Student0809 commited on Jun 6, 2025

Commit

14e1dca

verified ·

1 Parent(s): fb79a6d

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

ms-swift/.ipynb_checkpoints/dataset_OSST-checkpoint.json +0 -0
ms-swift/scripts/benchmark/exp_utils.py +391 -0
ms-swift/scripts/utils/plot_loss.py +9 -0
ms-swift/silence_overlaps/700/.ipynb_checkpoints/silence_isoverlap_train-checkpoint.json +1152 -0
ms-swift/silence_overlaps/700/original/silence_isoverlaps.json +0 -0
ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/overlap5s_silence_segments_test-checkpoint.json +27 -0
ms-swift/silence_overlaps/700/test/overlap5s_speaker_segments_test.json +27 -0
ms-swift/silence_overlaps/700/test/silence_speaker_segments_test.json +27 -0
ms-swift/silence_overlaps/700/train/overlap5s_transcriptions_train.json +0 -0
ms-swift/silence_overlaps/700/train/silence_isoverlaps_train.json +0 -0
ms-swift/silence_overlaps/silence_speaker_segments.json +0 -0
ms-swift/silence_overlaps/test/.ipynb_checkpoints/test_train-checkpoint.json +963 -0
ms-swift/swift/cli/__pycache__/main.cpython-310.pyc +0 -0
ms-swift/swift/cli/_megatron/__init__.py +0 -0
ms-swift/swift/cli/deploy.py +5 -0
ms-swift/swift/cli/sample.py +5 -0
ms-swift/swift/cli/sft.py +7 -0
ms-swift/swift/cli/web_ui.py +5 -0
ms-swift/swift/hub/__init__.py +1 -0
ms-swift/swift/llm/__init__.py +85 -0
ms-swift/swift/llm/__pycache__/base.cpython-310.pyc +0 -0
ms-swift/swift/llm/app/build_ui.py +139 -0
ms-swift/swift/llm/argument/__pycache__/app_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/eval_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/export_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/rlhf_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/sampling_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/train_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/tuner_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/webui_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/base_args/__pycache__/generation_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/base_args/__pycache__/quant_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/base_args/base_args.py +268 -0
ms-swift/swift/llm/argument/base_args/model_args.py +178 -0
ms-swift/swift/llm/argument/base_args/quant_args.py +91 -0
ms-swift/swift/llm/argument/deploy_args.py +76 -0
ms-swift/swift/llm/argument/export_args.py +107 -0
ms-swift/swift/llm/argument/infer_args.py +179 -0
ms-swift/swift/llm/argument/train_args.py +234 -0
ms-swift/swift/llm/argument/tuner_args.py +222 -0
ms-swift/swift/llm/dataset/__pycache__/__init__.cpython-310.pyc +0 -0
ms-swift/swift/llm/dataset/dataset/mllm.py +1215 -0
ms-swift/swift/llm/dataset/preprocessor/__pycache__/extra.cpython-310.pyc +0 -0
ms-swift/swift/llm/dataset/preprocessor/core.py +529 -0
ms-swift/swift/llm/ds_config/zero2_offload.json +35 -0
ms-swift/swift/llm/export/merge_lora.py +44 -0
ms-swift/swift/llm/export/quant.py +266 -0
ms-swift/swift/llm/infer/__pycache__/__init__.cpython-310.pyc +0 -0
ms-swift/swift/llm/infer/__pycache__/protocol.cpython-310.pyc +0 -0
ms-swift/swift/llm/infer/infer_engine/__init__.py +35 -0

ms-swift/.ipynb_checkpoints/dataset_OSST-checkpoint.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/scripts/benchmark/exp_utils.py ADDED Viewed

	@@ -0,0 +1,391 @@

+import os
+import shutil
+import subprocess
+import time
+from collections import deque
+from copy import deepcopy
+from dataclasses import asdict, dataclass, field
+from typing import Any, Dict, List
+import json
+import torch
+from swift.llm import ExportArguments
+from swift.utils import find_free_port, get_device_count, get_logger
+logger = get_logger()
+@dataclass
+class Experiment:
+    name: str
+    cmd: str
+    group: str
+    requirements: Dict = field(default_factory=dict)
+    eval_requirements: Dict = field(default_factory=dict)
+    eval_dataset: List = field(default_factory=list)
+    args: Dict = field(default_factory=dict)
+    env: Dict = field(default_factory=dict)
+    record: Dict = field(default_factory=dict)
+    create_time: float = None
+    runtime: Dict = field(default_factory=dict)
+    input_args: Any = None
+    do_eval = False
+    def __init__(self,
+                 name,
+                 cmd,
+                 group,
+                 requirements=None,
+                 eval_requirements=None,
+                 eval_dataset=None,
+                 args=None,
+                 input_args=None,
+                 **kwargs):
+        self.name = name
+        self.cmd = cmd
+        self.group = group
+        self.requirements = requirements or {}
+        self.args = args or {}
+        self.record = {}
+        self.env = {}
+        self.runtime = {}
+        self.input_args = input_args
+        self.eval_requirements = eval_requirements or {}
+        self.eval_dataset = eval_dataset or []
+        if self.cmd == 'eval':
+            self.do_eval = True
+    def load(self, _json):
+        self.name = _json['name']
+        self.cmd = _json['cmd']
+        self.requirements = _json['requirements']
+        self.args = _json['args']
+        self.record = _json['record']
+        self.env = _json['env']
+        self.create_time = _json['create_time']
+    @property
+    def priority(self):
+        return self.requirements.get('gpu', 0)
+    def to_dict(self):
+        _dict = asdict(self)
+        _dict.pop('runtime')
+        _dict.pop('input_args')
+        return _dict
+class ExpManager:
+    RESULT_FILE = 'result.jsonl'
+    def __init__(self):
+        self.exps = []
+    def assert_gpu_not_overlap(self):
+        all_gpus = set()
+        for exp in self.exps:
+            gpus = exp.runtime['env']['CUDA_VISIBLE_DEVICES'].split(',')
+            if all_gpus & set(gpus):
+                raise ValueError(f'GPU overlap: {self.exps}!')
+            all_gpus.update(gpus)
+    def run(self, exp: Experiment):
+        if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')):
+            with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f:
+                _json = json.load(f)
+                if exp.eval_dataset and 'eval_result' not in _json['record']:
+                    if not exp.do_eval:
+                        logger.info(f'Experiment {exp.name} need eval, load from file.')
+                        exp.load(_json)
+                        exp.do_eval = True
+                else:
+                    logger.warn(f'Experiment {exp.name} already done, skip')
+                    return
+        if exp.do_eval:
+            runtime = self._build_eval_cmd(exp)
+            exp.runtime = runtime
+            envs = deepcopy(runtime.get('env', {}))
+            envs.update(os.environ)
+            logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
+            os.makedirs('exp', exist_ok=True)
+            log_file = os.path.join('exp', f'{exp.name}.eval.log')
+            exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
+            self.exps.append(exp)
+            self.assert_gpu_not_overlap()
+            return
+        if any([exp.name == e.name for e in self.exps]):
+            raise ValueError(f'Why exp name duplicate? {exp.name}')
+        elif exp.cmd == 'export' and any([exp.cmd == 'export' for exp in self.exps]):  # noqa
+            raise AssertionError('Cannot run parallel export task.')
+        else:
+            exp.create_time = time.time()
+            runtime = self._build_cmd(exp)
+            exp.runtime = runtime
+            envs = deepcopy(runtime.get('env', {}))
+            envs.update(os.environ)
+            logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
+            os.makedirs('exp', exist_ok=True)
+            log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
+            exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
+            self.exps.append(exp)
+            self.assert_gpu_not_overlap()
+    def _build_eval_cmd(self, exp: Experiment):
+        gpu = exp.eval_requirements.get('gpu', None)
+        env = {}
+        allocated = []
+        if gpu:
+            allocated = self._find_free_gpu(int(gpu))
+            assert allocated, 'No free gpu for now!'
+            allocated = [str(gpu) for gpu in allocated]
+            env['CUDA_VISIBLE_DEVICES'] = ','.join(allocated)
+        best_model_checkpoint = exp.record.get('best_model_checkpoint')
+        eval_dataset = exp.eval_dataset
+        if best_model_checkpoint is not None:
+            if not os.path.exists(os.path.join(best_model_checkpoint, 'args.json')):
+                cmd = f'swift eval --ckpt_dir {best_model_checkpoint} ' \
+                      + f'--infer_backend pt --train_type full --eval_dataset {" ".join(eval_dataset)}'
+        else:
+            cmd = f'swift eval --model {exp.args.get("model")} --infer_backend pt ' \
+                  f'--eval_dataset {" ".join(eval_dataset)}'
+        return {
+            'running_cmd': cmd,
+            'gpu': allocated,
+            'env': env,
+        }
+    def _build_cmd(self, exp: Experiment):
+        gpu = exp.requirements.get('gpu', None)
+        env = {}
+        allocated = []
+        if gpu:
+            allocated = self._find_free_gpu(int(gpu))
+            assert allocated, 'No free gpu for now!'
+            allocated = [str(gpu) for gpu in allocated]
+            env['CUDA_VISIBLE_DEVICES'] = ','.join(allocated)
+        if int(exp.requirements.get('ddp', 1)) > 1:
+            env['NPROC_PER_NODE'] = exp.requirements.get('ddp')
+            env['MASTER_PORT'] = str(find_free_port())
+        if exp.cmd == 'sft':
+            from swift.llm import TrainArguments
+            args = exp.args
+            sft_args = TrainArguments(**args)
+            args['output_dir'] = sft_args.output_dir
+            args['logging_dir'] = sft_args.logging_dir
+            args['add_version'] = False
+            os.makedirs(sft_args.output_dir, exist_ok=True)
+            os.makedirs(sft_args.logging_dir, exist_ok=True)
+            cmd = 'swift sft '
+            for key, value in args.items():
+                cmd += f' --{key} {value}'
+        elif exp.cmd == 'rlhf':
+            from swift.llm import RLHFArguments
+            args = exp.args
+            rlhf_args = RLHFArguments(**args)
+            args['output_dir'] = rlhf_args.output_dir
+            args['logging_dir'] = rlhf_args.logging_dir
+            args['add_version'] = False
+            os.makedirs(rlhf_args.output_dir, exist_ok=True)
+            os.makedirs(rlhf_args.logging_dir, exist_ok=True)
+            cmd = 'swift rlhf '
+            for key, value in args.items():
+                cmd += f' --{key} {value}'
+        elif exp.cmd == 'export':
+            args = exp.args
+            cmd = 'swift export '
+            for key, value in args.items():
+                cmd += f' --{key} {value}'
+        else:
+            raise ValueError(f'Unsupported cmd type: {exp.cmd}')
+        return {
+            'running_cmd': cmd,
+            'gpu': allocated,
+            'env': env,
+            'logging_dir': args.get('logging_dir'),
+            'output_dir': args.get('output_dir', args.get('ckpt_dir'))
+        }
+    def _find_free_gpu(self, n):
+        all_gpus = set()
+        for exp in self.exps:
+            all_gpus.update(exp.runtime.get('gpu', set()))
+        all_gpus = {int(g) for g in all_gpus}
+        free_gpu = set(range(get_device_count())) - all_gpus
+        if len(free_gpu) < n:
+            return None
+        return list(free_gpu)[:n]
+    def prepare_experiments(self, args: Any):
+        experiments = []
+        for config_file in args.config:
+            with open(config_file, 'r', encoding='utf-8') as f:
+                group = os.path.basename(config_file)
+                group = group[:-5]
+                content = json.load(f)
+                exps = content['experiment']
+                for exp in exps:
+                    main_cfg = deepcopy(content)
+                    name = exp['name']
+                    cmd = main_cfg['cmd']
+                    run_args = main_cfg['args']
+                    env = main_cfg.get('env', {})
+                    requirements = main_cfg.get('requirements', {})
+                    eval_requirements = main_cfg.get('eval_requirements', {})
+                    eval_dataset = main_cfg.get('eval_dataset', {})
+                    if 'args' in exp:
+                        run_args.update(exp['args'])
+                    if 'requirements' in exp:
+                        requirements.update(exp['requirements'])
+                    if 'env' in exp:
+                        env.update(exp['env'])
+                    experiments.append(
+                        Experiment(
+                            group=group,
+                            name=name,
+                            cmd=cmd,
+                            args=run_args,
+                            env=env,
+                            requirements=requirements,
+                            eval_requirements=eval_requirements,
+                            eval_dataset=eval_dataset,
+                            input_args=args))
+        return experiments
+    @staticmethod
+    def _get_metric(exp: Experiment):
+        if exp.do_eval:
+            if os.path.isfile(os.path.join('exp', f'{exp.name}.eval.log')):
+                with open(os.path.join('exp', f'{exp.name}.eval.log'), 'r', encoding='utf-8') as f:
+                    for line in f.readlines():
+                        if 'Final report:' in line:
+                            return json.loads(line.split('Final report:')[1].replace('\'', '"'))
+        elif exp.cmd == 'export':
+            exp_args = ExportArguments(**exp.args)
+            if exp_args.quant_bits > 0:
+                if exp_args.ckpt_dir is None:
+                    path = f'{exp_args.model_type}-{exp_args.quant_method}-int{exp_args.quant_bits}'
+                else:
+                    ckpt_dir, ckpt_name = os.path.split(exp_args.ckpt_dir)
+                    path = os.path.join(ckpt_dir, f'{ckpt_name}-{exp_args.quant_method}-int{exp_args.quant_bits}')
+            else:
+                ckpt_dir, ckpt_name = os.path.split(exp_args.ckpt_dir)
+                path = os.path.join(ckpt_dir, f'{ckpt_name}-merged')
+            if os.path.exists(path):
+                shutil.rmtree(exp.name, ignore_errors=True)
+                os.makedirs(exp.name, exist_ok=True)
+                shutil.move(path, os.path.join(exp.name, path))
+                return {
+                    'best_model_checkpoint': os.path.join(exp.name, path),
+                }
+        else:
+            logging_dir = exp.runtime.get('logging_dir')
+            logging_file = os.path.join(logging_dir, '..', 'logging.jsonl')
+            if os.path.isfile(logging_file):
+                with open(logging_file, 'r', encoding='utf-8') as f:
+                    for line in f.readlines():
+                        if 'model_info' in line:
+                            return json.loads(line)
+        return None
+    @staticmethod
+    def write_record(exp: Experiment):
+        target_dir = exp.input_args.save_dir
+        file = os.path.join(target_dir, exp.name + '.json')
+        with open(file, 'w', encoding='utf-8') as f:
+            f.write(json.dumps(exp.to_dict()) + '\n')
+    def _poll(self):
+        while True:
+            time.sleep(5)
+            has_finished = False
+            for exp in self.exps:
+                rt = exp.handler.poll()
+                if rt is None:
+                    continue
+                has_finished = True
+                if rt == 0:
+                    if not exp.do_eval:
+                        all_metric = self._get_metric(exp)
+                        if all_metric:
+                            exp.record.update(all_metric)
+                            if exp.eval_dataset:
+                                exp.do_eval = True
+                                self.exp_queue.appendleft(exp)
+                            self.write_record(exp)
+                        else:
+                            logger.error(f'Running {exp.name} task, but no result found')
+                    else:
+                        all_metric = self._get_metric(exp)
+                        exp.record['eval_result'] = all_metric
+                        if all_metric:
+                            self.write_record(exp)
+                        else:
+                            logger.error(f'Running {exp.name} eval task, but no eval result found')
+                logger.info(f'Running {exp.name} finished with return code: {rt}')
+            if has_finished:
+                self.exps = [exp for exp in self.exps if exp.handler.poll() is None]
+                break
+    def begin(self, args: Any):
+        exps = self.prepare_experiments(args)
+        logger.info(f'all exps: {exps}')
+        exps.sort(key=lambda e: e.priority)
+        self.exp_queue = deque()
+        for exp in exps:
+            self.exp_queue.append(exp)
+        while len(self.exp_queue) or len(self.exps) > 0:
+            while len(self.exp_queue):
+                try:
+                    logger.info(f'Running exp: {self.exp_queue[0].name}')
+                    self.run(self.exp_queue[0])
+                except Exception as e:
+                    if not isinstance(e, AssertionError):
+                        logger.error(f'Adding exp {self.exp_queue[0].name} error because of:')
+                        logger.error(e)
+                        self.exp_queue.popleft()
+                    else:
+                        logger.info(f'Adding exp {self.exp_queue[0].name} error because of:', str(e))
+                    if 'no free gpu' in str(e).lower():
+                        break
+                    else:
+                        continue
+                else:
+                    self.exp_queue.popleft()
+            self._poll()
+        logger.info(f'Run task finished because of exp queue: {self.exp_queue} and exps: {self.exps}')
+def find_all_config(dir_or_file: str):
+    if os.path.isfile(dir_or_file):
+        return [dir_or_file]
+    else:
+        configs = []
+        for dirpath, dirnames, filenames in os.walk(dir_or_file):
+            for name in filenames:
+                if name.endswith('.json') and 'ipynb' not in dirpath:
+                    configs.append(os.path.join(dirpath, name))
+        return configs

ms-swift/scripts/utils/plot_loss.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from swift.utils import plot_images
+ckpt_dir = 'output/xxx/vx-xxx'
+if __name__ == '__main__':
+    images_dir = os.path.join(ckpt_dir, 'images')
+    tb_dir = os.path.join(ckpt_dir, 'runs')
+    plot_images(images_dir, tb_dir, ['train/loss'], 0.9)

ms-swift/silence_overlaps/700/.ipynb_checkpoints/silence_isoverlap_train-checkpoint.json ADDED Viewed

	@@ -0,0 +1,1152 @@

+[
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1101857.wav",
+    "key": "SODA_PROCESSED--train--1101857",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--508884.wav",
+    "key": "SODA_PROCESSED--train--508884",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1113674.wav",
+    "key": "SODA_PROCESSED--train--1113674",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--158293.wav",
+    "key": "SODA_PROCESSED--train--158293",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--631363.wav",
+    "key": "SODA_PROCESSED--train--631363",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277322.wav",
+    "key": "SODA_PROCESSED--train--277322",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1131940.wav",
+    "key": "SODA_PROCESSED--train--1131940",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1108753.wav",
+    "key": "SODA_PROCESSED--train--1108753",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--27924.wav",
+    "key": "SODA_PROCESSED--train--27924",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--374749.wav",
+    "key": "SODA_PROCESSED--train--374749",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--821468.wav",
+    "key": "SODA_PROCESSED--train--821468",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--416516.wav",
+    "key": "SODA_PROCESSED--train--416516",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1029082.wav",
+    "key": "SODA_PROCESSED--train--1029082",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--992151.wav",
+    "key": "SODA_PROCESSED--train--992151",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--744708.wav",
+    "key": "SODA_PROCESSED--train--744708",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--11862.wav",
+    "key": "SODA_PROCESSED--train--11862",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--419304.wav",
+    "key": "SODA_PROCESSED--train--419304",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--98673.wav",
+    "key": "SODA_PROCESSED--train--98673",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--22719.wav",
+    "key": "SODA_PROCESSED--train--22719",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1028263.wav",
+    "key": "SODA_PROCESSED--train--1028263",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--848051.wav",
+    "key": "SODA_PROCESSED--train--848051",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--511668.wav",
+    "key": "SODA_PROCESSED--train--511668",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12047.wav",
+    "key": "SODA_PROCESSED--train--12047",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--153751.wav",
+    "key": "SODA_PROCESSED--train--153751",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795559.wav",
+    "key": "SODA_PROCESSED--train--795559",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--387024.wav",
+    "key": "SODA_PROCESSED--train--387024",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
+    "key": "SODA_PROCESSED--train--1168213",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1123711.wav",
+    "key": "SODA_PROCESSED--train--1123711",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--819618.wav",
+    "key": "SODA_PROCESSED--train--819618",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--752118.wav",
+    "key": "SODA_PROCESSED--train--752118",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--581770.wav",
+    "key": "SODA_PROCESSED--train--581770",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--276032.wav",
+    "key": "SODA_PROCESSED--train--276032",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556475.wav",
+    "key": "SODA_PROCESSED--train--556475",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674667.wav",
+    "key": "SODA_PROCESSED--train--674667",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--501206.wav",
+    "key": "SODA_PROCESSED--train--501206",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--905725.wav",
+    "key": "SODA_PROCESSED--train--905725",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--265829.wav",
+    "key": "SODA_PROCESSED--train--265829",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--207527.wav",
+    "key": "SODA_PROCESSED--train--207527",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--985415.wav",
+    "key": "SODA_PROCESSED--train--985415",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--115102.wav",
+    "key": "SODA_PROCESSED--train--115102",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--8820.wav",
+    "key": "SODA_PROCESSED--train--8820",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--453454.wav",
+    "key": "SODA_PROCESSED--train--453454",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--375003.wav",
+    "key": "SODA_PROCESSED--train--375003",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--757426.wav",
+    "key": "SODA_PROCESSED--train--757426",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--202914.wav",
+    "key": "SODA_PROCESSED--train--202914",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007416.wav",
+    "key": "SODA_PROCESSED--train--1007416",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
+    "key": "SODA_PROCESSED--train--193891",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--96343.wav",
+    "key": "SODA_PROCESSED--train--96343",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1031234.wav",
+    "key": "SODA_PROCESSED--train--1031234",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--798455.wav",
+    "key": "SODA_PROCESSED--train--798455",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--438636.wav",
+    "key": "SODA_PROCESSED--train--438636",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--776766.wav",
+    "key": "SODA_PROCESSED--train--776766",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--691830.wav",
+    "key": "SODA_PROCESSED--train--691830",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--524306.wav",
+    "key": "SODA_PROCESSED--train--524306",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--471264.wav",
+    "key": "SODA_PROCESSED--train--471264",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421778.wav",
+    "key": "SODA_PROCESSED--train--421778",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--541347.wav",
+    "key": "SODA_PROCESSED--train--541347",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1084325.wav",
+    "key": "SODA_PROCESSED--train--1084325",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29039.wav",
+    "key": "SODA_PROCESSED--train--29039",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1182464.wav",
+    "key": "SODA_PROCESSED--train--1182464",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--735517.wav",
+    "key": "SODA_PROCESSED--train--735517",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--417260.wav",
+    "key": "SODA_PROCESSED--train--417260",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--784738.wav",
+    "key": "SODA_PROCESSED--train--784738",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303363.wav",
+    "key": "SODA_PROCESSED--train--303363",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795181.wav",
+    "key": "SODA_PROCESSED--train--795181",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--33760.wav",
+    "key": "SODA_PROCESSED--train--33760",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--126878.wav",
+    "key": "SODA_PROCESSED--train--126878",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--317167.wav",
+    "key": "SODA_PROCESSED--train--317167",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463322.wav",
+    "key": "SODA_PROCESSED--train--463322",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--51285.wav",
+    "key": "SODA_PROCESSED--train--51285",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1081079.wav",
+    "key": "SODA_PROCESSED--train--1081079",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--58199.wav",
+    "key": "SODA_PROCESSED--train--58199",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1017701.wav",
+    "key": "SODA_PROCESSED--train--1017701",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--762267.wav",
+    "key": "SODA_PROCESSED--train--762267",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4948.wav",
+    "key": "SODA_PROCESSED--train--4948",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737676.wav",
+    "key": "SODA_PROCESSED--train--737676",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--606362.wav",
+    "key": "SODA_PROCESSED--train--606362",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674832.wav",
+    "key": "SODA_PROCESSED--train--674832",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--588465.wav",
+    "key": "SODA_PROCESSED--train--588465",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--386163.wav",
+    "key": "SODA_PROCESSED--train--386163",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421624.wav",
+    "key": "SODA_PROCESSED--train--421624",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977126.wav",
+    "key": "SODA_PROCESSED--train--977126",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--932676.wav",
+    "key": "SODA_PROCESSED--train--932676",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--315768.wav",
+    "key": "SODA_PROCESSED--train--315768",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--939669.wav",
+    "key": "SODA_PROCESSED--train--939669",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1174912.wav",
+    "key": "SODA_PROCESSED--train--1174912",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1023331.wav",
+    "key": "SODA_PROCESSED--train--1023331",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--144310.wav",
+    "key": "SODA_PROCESSED--train--144310",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1011922.wav",
+    "key": "SODA_PROCESSED--train--1011922",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--713730.wav",
+    "key": "SODA_PROCESSED--train--713730",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708040.wav",
+    "key": "SODA_PROCESSED--train--708040",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--860576.wav",
+    "key": "SODA_PROCESSED--train--860576",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1001007.wav",
+    "key": "SODA_PROCESSED--train--1001007",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1166623.wav",
+    "key": "SODA_PROCESSED--train--1166623",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--372789.wav",
+    "key": "SODA_PROCESSED--train--372789",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468603.wav",
+    "key": "SODA_PROCESSED--train--468603",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--233562.wav",
+    "key": "SODA_PROCESSED--train--233562",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--476626.wav",
+    "key": "SODA_PROCESSED--train--476626",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--49462.wav",
+    "key": "SODA_PROCESSED--train--49462",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303336.wav",
+    "key": "SODA_PROCESSED--train--303336",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--221358.wav",
+    "key": "SODA_PROCESSED--train--221358",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--843615.wav",
+    "key": "SODA_PROCESSED--train--843615",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--873625.wav",
+    "key": "SODA_PROCESSED--train--873625",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4814.wav",
+    "key": "SODA_PROCESSED--train--4814",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--280675.wav",
+    "key": "SODA_PROCESSED--train--280675",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1132437.wav",
+    "key": "SODA_PROCESSED--train--1132437",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--650705.wav",
+    "key": "SODA_PROCESSED--train--650705",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186756.wav",
+    "key": "SODA_PROCESSED--train--1186756",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--108309.wav",
+    "key": "SODA_PROCESSED--train--108309",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--82238.wav",
+    "key": "SODA_PROCESSED--train--82238",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--616846.wav",
+    "key": "SODA_PROCESSED--train--616846",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--61606.wav",
+    "key": "SODA_PROCESSED--train--61606",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--370577.wav",
+    "key": "SODA_PROCESSED--train--370577",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--69581.wav",
+    "key": "SODA_PROCESSED--train--69581",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--180962.wav",
+    "key": "SODA_PROCESSED--train--180962",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--578986.wav",
+    "key": "SODA_PROCESSED--train--578986",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--132857.wav",
+    "key": "SODA_PROCESSED--train--132857",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--188417.wav",
+    "key": "SODA_PROCESSED--train--188417",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--771154.wav",
+    "key": "SODA_PROCESSED--train--771154",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--720445.wav",
+    "key": "SODA_PROCESSED--train--720445",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--514225.wav",
+    "key": "SODA_PROCESSED--train--514225",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--815822.wav",
+    "key": "SODA_PROCESSED--train--815822",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--761001.wav",
+    "key": "SODA_PROCESSED--train--761001",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1061857.wav",
+    "key": "SODA_PROCESSED--train--1061857",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--475793.wav",
+    "key": "SODA_PROCESSED--train--475793",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--406352.wav",
+    "key": "SODA_PROCESSED--train--406352",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--486716.wav",
+    "key": "SODA_PROCESSED--train--486716",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468879.wav",
+    "key": "SODA_PROCESSED--train--468879",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--338832.wav",
+    "key": "SODA_PROCESSED--train--338832",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--845126.wav",
+    "key": "SODA_PROCESSED--train--845126",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--490986.wav",
+    "key": "SODA_PROCESSED--train--490986",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1128813.wav",
+    "key": "SODA_PROCESSED--train--1128813",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193134.wav",
+    "key": "SODA_PROCESSED--train--193134",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--258235.wav",
+    "key": "SODA_PROCESSED--train--258235",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--895260.wav",
+    "key": "SODA_PROCESSED--train--895260",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--119322.wav",
+    "key": "SODA_PROCESSED--train--119322",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--413405.wav",
+    "key": "SODA_PROCESSED--train--413405",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--346041.wav",
+    "key": "SODA_PROCESSED--train--346041",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--718092.wav",
+    "key": "SODA_PROCESSED--train--718092",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--218634.wav",
+    "key": "SODA_PROCESSED--train--218634",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--835488.wav",
+    "key": "SODA_PROCESSED--train--835488",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--113543.wav",
+    "key": "SODA_PROCESSED--train--113543",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
+    "key": "SODA_PROCESSED--train--869455",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--330048.wav",
+    "key": "SODA_PROCESSED--train--330048",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--766234.wav",
+    "key": "SODA_PROCESSED--train--766234",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--535368.wav",
+    "key": "SODA_PROCESSED--train--535368",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908444.wav",
+    "key": "SODA_PROCESSED--train--908444",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--748910.wav",
+    "key": "SODA_PROCESSED--train--748910",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--525710.wav",
+    "key": "SODA_PROCESSED--train--525710",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--399572.wav",
+    "key": "SODA_PROCESSED--train--399572",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737726.wav",
+    "key": "SODA_PROCESSED--train--737726",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--44625.wav",
+    "key": "SODA_PROCESSED--train--44625",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1095086.wav",
+    "key": "SODA_PROCESSED--train--1095086",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--269886.wav",
+    "key": "SODA_PROCESSED--train--269886",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596068.wav",
+    "key": "SODA_PROCESSED--train--596068",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--16779.wav",
+    "key": "SODA_PROCESSED--train--16779",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--532510.wav",
+    "key": "SODA_PROCESSED--train--532510",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--68508.wav",
+    "key": "SODA_PROCESSED--train--68508",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--162106.wav",
+    "key": "SODA_PROCESSED--train--162106",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--831005.wav",
+    "key": "SODA_PROCESSED--train--831005",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--509788.wav",
+    "key": "SODA_PROCESSED--train--509788",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--489519.wav",
+    "key": "SODA_PROCESSED--train--489519",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1020087.wav",
+    "key": "SODA_PROCESSED--train--1020087",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1050427.wav",
+    "key": "SODA_PROCESSED--train--1050427",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--842885.wav",
+    "key": "SODA_PROCESSED--train--842885",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--166191.wav",
+    "key": "SODA_PROCESSED--train--166191",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--826028.wav",
+    "key": "SODA_PROCESSED--train--826028",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--715956.wav",
+    "key": "SODA_PROCESSED--train--715956",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--967872.wav",
+    "key": "SODA_PROCESSED--train--967872",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277060.wav",
+    "key": "SODA_PROCESSED--train--277060",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--803822.wav",
+    "key": "SODA_PROCESSED--train--803822",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--928982.wav",
+    "key": "SODA_PROCESSED--train--928982",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--371354.wav",
+    "key": "SODA_PROCESSED--train--371354",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12295.wav",
+    "key": "SODA_PROCESSED--train--12295",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1030451.wav",
+    "key": "SODA_PROCESSED--train--1030451",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--168398.wav",
+    "key": "SODA_PROCESSED--train--168398",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556505.wav",
+    "key": "SODA_PROCESSED--train--556505",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--123906.wav",
+    "key": "SODA_PROCESSED--train--123906",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1120331.wav",
+    "key": "SODA_PROCESSED--train--1120331",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--121129.wav",
+    "key": "SODA_PROCESSED--train--121129",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--690063.wav",
+    "key": "SODA_PROCESSED--train--690063",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--334902.wav",
+    "key": "SODA_PROCESSED--train--334902",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--442672.wav",
+    "key": "SODA_PROCESSED--train--442672",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--640494.wav",
+    "key": "SODA_PROCESSED--train--640494",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--171463.wav",
+    "key": "SODA_PROCESSED--train--171463",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--565809.wav",
+    "key": "SODA_PROCESSED--train--565809",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--329396.wav",
+    "key": "SODA_PROCESSED--train--329396",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1090942.wav",
+    "key": "SODA_PROCESSED--train--1090942",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--980776.wav",
+    "key": "SODA_PROCESSED--train--980776",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29858.wav",
+    "key": "SODA_PROCESSED--train--29858",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596349.wav",
+    "key": "SODA_PROCESSED--train--596349",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--604536.wav",
+    "key": "SODA_PROCESSED--train--604536",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--500115.wav",
+    "key": "SODA_PROCESSED--train--500115",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--605295.wav",
+    "key": "SODA_PROCESSED--train--605295",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--310941.wav",
+    "key": "SODA_PROCESSED--train--310941",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1051089.wav",
+    "key": "SODA_PROCESSED--train--1051089",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--187351.wav",
+    "key": "SODA_PROCESSED--train--187351",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--645254.wav",
+    "key": "SODA_PROCESSED--train--645254",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1066203.wav",
+    "key": "SODA_PROCESSED--train--1066203",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--913166.wav",
+    "key": "SODA_PROCESSED--train--913166",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--583204.wav",
+    "key": "SODA_PROCESSED--train--583204",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--575640.wav",
+    "key": "SODA_PROCESSED--train--575640",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--109428.wav",
+    "key": "SODA_PROCESSED--train--109428",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--246434.wav",
+    "key": "SODA_PROCESSED--train--246434",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977434.wav",
+    "key": "SODA_PROCESSED--train--977434",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--665430.wav",
+    "key": "SODA_PROCESSED--train--665430",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--960193.wav",
+    "key": "SODA_PROCESSED--train--960193",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--31287.wav",
+    "key": "SODA_PROCESSED--train--31287",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--254497.wav",
+    "key": "SODA_PROCESSED--train--254497",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--774546.wav",
+    "key": "SODA_PROCESSED--train--774546",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--273875.wav",
+    "key": "SODA_PROCESSED--train--273875",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--822773.wav",
+    "key": "SODA_PROCESSED--train--822773",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1052554.wav",
+    "key": "SODA_PROCESSED--train--1052554",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--179972.wav",
+    "key": "SODA_PROCESSED--train--179972",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1117467.wav",
+    "key": "SODA_PROCESSED--train--1117467",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--961025.wav",
+    "key": "SODA_PROCESSED--train--961025",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--923496.wav",
+    "key": "SODA_PROCESSED--train--923496",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--41171.wav",
+    "key": "SODA_PROCESSED--train--41171",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--679971.wav",
+    "key": "SODA_PROCESSED--train--679971",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--876910.wav",
+    "key": "SODA_PROCESSED--train--876910",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--617278.wav",
+    "key": "SODA_PROCESSED--train--617278",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463700.wav",
+    "key": "SODA_PROCESSED--train--463700",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186623.wav",
+    "key": "SODA_PROCESSED--train--1186623",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1076109.wav",
+    "key": "SODA_PROCESSED--train--1076109",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--414445.wav",
+    "key": "SODA_PROCESSED--train--414445",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--546350.wav",
+    "key": "SODA_PROCESSED--train--546350",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1144076.wav",
+    "key": "SODA_PROCESSED--train--1144076",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--104948.wav",
+    "key": "SODA_PROCESSED--train--104948",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
+    "key": "SODA_PROCESSED--train--836740",
+    "model_output": "No significant overlaps found."
+  },
+  {
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--437951.wav",
+    "key": "SODA_PROCESSED--train--437951",
+    "model_output": "No significant overlaps found."
+  }
+]

ms-swift/silence_overlaps/700/original/silence_isoverlaps.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/overlap5s_silence_segments_test-checkpoint.json ADDED Viewed

	@@ -0,0 +1,27 @@

+[
+  {
+    "key": "SODA_PROCESSED--train--137471",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--137471.wav",
+    "model_output": "No, there is no silence gap."
+  },
+  {
+    "key": "SODA_PROCESSED--train--201044",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--201044.wav",
+    "model_output": "No, there is no silence gap."
+  },
+  {
+    "key": "SODA_PROCESSED--train--596349",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--596349.wav",
+    "model_output": "No, there is no silence gap."
+  },
+  {
+    "key": "SODA_PROCESSED--train--956648",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--956648.wav",
+    "model_output": "No, there is no silence gap."
+  },
+  {
+    "key": "SODA_PROCESSED--train--962210",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--962210.wav",
+    "model_output": "No, there is no silence gap."
+  }
+]

ms-swift/silence_overlaps/700/test/overlap5s_speaker_segments_test.json ADDED Viewed

	@@ -0,0 +1,27 @@

+[
+  {
+    "key": "SODA_PROCESSED--train--254497",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--254497.wav",
+    "model_output": "Speaker A: 00:00-00:03, 00:06-00:14, 00:23-00:26, 00:31-00:35\nSpeaker B: 00:03-00:15, 00:16-00:22, 00:27-00:30"
+  },
+  {
+    "key": "SODA_PROCESSED--train--1185164",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1185164.wav",
+    "model_output": "Speaker A: 00:00-00:01, 00:04-00:20, 00:20-00:33, 00:41-00:46, 00:52-00:54\nSpeaker B: 00:02-00:03, 00:09-00:15, 00:33-00:40, 00:47-00:52"
+  },
+  {
+    "key": "SODA_PROCESSED--train--205413",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--205413.wav",
+    "model_output": "Speaker A: 00:00-00:01, 00:03-00:05, 00:15-00:18, 00:28-00:31, 00:36-00:40\nSpeaker B: 00:02-00:09, 00:09-00:15, 00:19-00:28, 00:32-00:36, 00:40-00:41"
+  },
+  {
+    "key": "SODA_PROCESSED--train--585968",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--585968.wav",
+    "model_output": "Speaker A: 00:00-00:15, 00:15-00:23, 00:29-00:33, 00:36-00:43, 00:51-00:57, 01:05-01:08, 01:13-01:16\nSpeaker B: 00:06-00:14, 00:24-00:28, 00:33-00:36, 00:43-00:51, 00:57-01:04, 01:09-01:12, 01:17-01:18"
+  },
+  {
+    "key": "SODA_PROCESSED--train--1079940",
+    "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1079940.wav",
+    "model_output": "Speaker A: 00:00-00:04, 00:07-00:12, 00:18-00:22, 00:32-00:37, 00:43-00:46\nSpeaker B: 00:04-00:13, 00:14-00:17, 00:23-00:32, 00:38-00:42"
+  }
+]

ms-swift/silence_overlaps/700/test/silence_speaker_segments_test.json ADDED Viewed

	@@ -0,0 +1,27 @@

+[
+  {
+    "key": "SODA_PROCESSED--train--869455",
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
+    "model_output": "Speaker A: 00:00-00:11, 00:15-00:25, 00:27-00:38, 00:41-00:53\nSpeaker B: 00:12-00:15, 00:25-00:27, 00:39-00:42"
+  },
+  {
+    "key": "SODA_PROCESSED--train--420178",
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--420178.wav",
+    "model_output": "Speaker A: 00:00-00:01, 00:04-00:08, 00:10-00:19, 00:31-00:46, 00:50-00:52\nSpeaker B: 00:01-00:03, 00:08-00:09, 00:18-00:26, 00:47-00:49"
+  },
+  {
+    "key": "SODA_PROCESSED--train--836740",
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
+    "model_output": "Speaker A: 00:00-00:04, 00:07-00:15, 00:25-00:29, 00:43-00:48\nSpeaker B: 00:04-00:08, 00:20-00:24, 00:29-00:42, 00:48-00:54"
+  },
+  {
+    "key": "SODA_PROCESSED--train--64931",
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--64931.wav",
+    "model_output": "Speaker A: 00:00-00:02, 00:05-00:08, 00:23-00:28, 00:32-00:39\nSpeaker B: 00:03-00:06, 00:13-00:22, 00:29-00:32"
+  },
+  {
+    "key": "SODA_PROCESSED--train--193891",
+    "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
+    "model_output": "Speaker A: 00:00-00:01, 00:04-00:09, 00:14-00:21, 00:38-00:47, 00:56-01:05\nSpeaker B: 00:01-00:03, 00:09-00:15, 00:26-00:38, 00:48-00:56, 01:05-01:10"
+  }
+]

ms-swift/silence_overlaps/700/train/overlap5s_transcriptions_train.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/silence_overlaps/700/train/silence_isoverlaps_train.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/silence_overlaps/silence_speaker_segments.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/silence_overlaps/test/.ipynb_checkpoints/test_train-checkpoint.json ADDED Viewed

	@@ -0,0 +1,963 @@

+{
+    "SODA_PROCESSED--train--449689": {
+        "original_dialog_id": "",
+        "dialog_index": 449689,
+        "processed_dialogue": "A: Hey there. Mind if I lay down next to you?  \nB: No, go ahead.  \nA: Thanks. I needed a break from the sun. It's so hot today.  \nB: Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.  \nA: Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?  \nB: Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.  \nA: That sounds rough. Are you excited for it? Or [interrupt] worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?  \nB: Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.  \nA: Definitely. Well, I hope you enjoy the rest of your day here.  \nB: Thanks. You too.",
+        "clean_dialogue": "A: Hey there. Mind if I lay down next to you?  \nB: No, go ahead.  \nA: Thanks. I needed a break from the sun. It's so hot today.  \nB: Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.  \nA: Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?  \nB: Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.  \nA:That sounds rough. Are you excited for it? Or worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?\nB: Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.  \nA: Definitely. Well, I hope you enjoy the rest of your day here.  \nB: Thanks. You too.",
+        "speaker_tracks": {
+            "A": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/A_track.wav",
+            "B": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/B_track.wav"
+        },
+        "error_type": "error_after_interrupt",
+        "stereo_audio": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/stereo_dialogue.wav",
+        "total_duration": 50.09668934240363,
+        "segments": [
+            {
+                "speaker": "A",
+                "text": "Hey there. Mind if I lay down next to you?",
+                "original_text": "Hey there. Mind if I lay down next to you?",
+                "start_time": 0,
+                "end_time": 2.4961451247165534,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_0_A.wav",
+                "silence_duration": 0,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "No, go ahead.",
+                "original_text": "No, go ahead.",
+                "start_time": 3.0616233505922237,
+                "end_time": 4.257451014991316,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_1_B.wav",
+                "silence_duration": 0.5654782258756702,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Thanks. I needed a break from the sun. It's so hot today.",
+                "original_text": "Thanks. I needed a break from the sun. It's so hot today.",
+                "start_time": 4.673061027457998,
+                "end_time": 8.666893227004483,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_2_A.wav",
+                "silence_duration": 0.41561001246668183,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.",
+                "original_text": "Yeah, it is. I'm trying to get a tan, but I don't want to get too dehydrated, so I'm keeping a bottle of water close by and reapplying sunscreen every hour to avoid any skin damage.",
+                "start_time": 9.128191918953855,
+                "end_time": 19.01989259922596,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_3_B.wav",
+                "silence_duration": 0.46129869194937123,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?",
+                "original_text": "Burnt? Yeah, that's definitely a possibility out here. So what brings you to the beach today? Just wanting to relax?",
+                "start_time": 19.43691572474219,
+                "end_time": 27.215600531998426,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_4_A.wav",
+                "silence_duration": 0.4170231255162265,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.",
+                "original_text": "Yeah, pretty much. I just finished up my summer classes and needed some time to myself before starting my new job next week.",
+                "start_time": 27.73206790619358,
+                "end_time": 34.08272550256547,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_5_B.wav",
+                "silence_duration": 0.5164673741951538,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "That sounds rough. Are you excited for it? Or",
+                "original_text": "That sounds rough. Are you excited for it? Or [interrupt] worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?",
+                "start_time": 34.40566150397062,
+                "end_time": 44.703711390591934,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_6_A.wav",
+                "silence_duration": 0.3229360014051523,
+                "is_interrupted": true,
+                "text_after_interrupt": "worried about how you'll balance everything with your personal life and other commitments you might have during this transitional period?"
+            },
+            {
+                "speaker": "B",
+                "text": "Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.",
+                "original_text": "Nervous? A little bit of both, honestly. But mostly excited. It should be a good experience. And the pay is great, so that's a plus.",
+                "start_time": 37.1456161524967,
+                "end_time": 44.564391662700785,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_7_B.wav",
+                "silence_duration": 0.36321869535217244,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Definitely. Well, I hope you enjoy the rest of your day here.",
+                "original_text": "Definitely. Well, I hope you enjoy the rest of your day here.",
+                "start_time": 44.9023552612567,
+                "end_time": 48.78008768756056,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_8_A.wav",
+                "silence_duration": 0.33796359855591646,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Thanks. You too.",
+                "original_text": "Thanks. You too.",
+                "start_time": 49.1679089027611,
+                "end_time": 50.09670708870214,
+                "audio_file": "/root/autodl-tmp/output_mixedAudios/processed_soda_3_processed_dialogues_part_20/SODA_PROCESSED--train--449689/temp/line_9_B.wav",
+                "silence_duration": 0.38782121520053575,
+                "is_interrupted": false
+            }
+        ],
+        "gt_score": 1
+    },
+    "SODA_PROCESSED--train--787791": {
+        "original_dialog_id": "",
+        "dialog_index": 787791,
+        "processed_dialogue": "A: You're welcome. I'm just glad I was able to stop it from happening.  \nB: Thank you so much for saving my life. I can't even begin to express how [interrupt] grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment.  \nA: Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?  \nB: I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.  \nA: Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?  \nB: I really appreciate that. Thanks again, Antwain.  \nA: No problem. Take care.",
+        "clean_dialogue": "A: You're welcome. I'm just glad I was able to stop it from happening.  \nB:Thank you so much for saving my life. I can't even begin to express how grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment.\nA: Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?  \nB: I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.  \nA: Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?  \nB: I really appreciate that. Thanks again, Antwain.  \nA: No problem. Take care.",
+        "speaker_tracks": {
+            "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/A_track.wav",
+            "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/B_track.wav"
+        },
+        "error_type": "error_after_interrupt",
+        "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/stereo_dialogue.wav",
+        "total_duration": 37.52730158730159,
+        "segments": [
+            {
+                "speaker": "A",
+                "text": "You're welcome. I'm just glad I was able to stop it from happening.",
+                "original_text": "You're welcome. I'm just glad I was able to stop it from happening.",
+                "start_time": 0,
+                "end_time": 4.249251700680272,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_0_A.wav",
+                "silence_duration": 0,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Thank you so much for saving my life. I can't even begin to express how",
+                "original_text": "Thank you so much for saving my life. I can't even begin to express how [interrupt] grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment.",
+                "start_time": 4.756366963799184,
+                "end_time": 14.694507553368345,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_1_B.wav",
+                "silence_duration": 0.5071152631189118,
+                "is_interrupted": true,
+                "text_after_interrupt": "grateful I am for what you did. It means the world to me and I'll never forget your kindness and quick thinking in that moment."
+            },
+            {
+                "speaker": "A",
+                "text": "Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?",
+                "original_text": "Sorry to jump in, but are you sure you're okay? I mean, physically and emotionally?",
+                "start_time": 8.726979208697143,
+                "end_time": 14.357818210964716,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_2_A.wav",
+                "silence_duration": 0.4049084459018305,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.",
+                "original_text": "I think so, but it's all still a bit of a blur. I don't know what would have happened if you hadn't been there. I'm just glad that you were in the right place at the right time.",
+                "start_time": 14.861085984580113,
+                "end_time": 23.649838819047233,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_3_B.wav",
+                "silence_duration": 0.5032677736153957,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?",
+                "original_text": "Yeah, me too. But seriously, if you need anything—someone to talk to or whatever—don't hesitate to reach out, okay?",
+                "start_time": 24.145193415777634,
+                "end_time": 32.515987066571284,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_4_A.wav",
+                "silence_duration": 0.4953545967303996,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "I really appreciate that. Thanks again, Antwain.",
+                "original_text": "I really appreciate that. Thanks again, Antwain.",
+                "start_time": 32.97180815148517,
+                "end_time": 35.68854284536272,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_5_B.wav",
+                "silence_duration": 0.4558210849138826,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "No problem. Take care.",
+                "original_text": "No problem. Take care.",
+                "start_time": 35.99481454512998,
+                "end_time": 37.5273315519327,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_8/SODA_PROCESSED--train--787791/temp/line_6_A.wav",
+                "silence_duration": 0.3062716997672569,
+                "is_interrupted": false
+            }
+        ],
+        "gt_score": 1
+    },
+    "SODA_PROCESSED--train--179972": {
+        "original_dialog_id": "",
+        "dialog_index": 179972,
+        "processed_dialogue": "A: So, how did you like the book?  \nB: I loved it! The ending was so shocking, I couldn't believe what happened.  \nA: Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.  \nB: No, I didn't see it coming at all! It was so unexpected.  \nA: Yeah, I know. I couldn't put it down.  \nB: Me neither. I'm so glad you wanted to read it.  \nA: Yeah, I was curious about the protagonist's journey and how it would [interrupt] evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict.  \nB: Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.  \nA: It was definitely a rollercoaster ride. There were so many twists and turns.  \nB: I know! I didn't see any of them coming.  \nA: That's what made it so great. It kept you guessing the whole time.  \nB: Definitely. It was a great book. Thanks for lending it to me.",
+        "clean_dialogue": "A: So, how did you like the book?  \nB: I loved it! The ending was so shocking, I couldn't believe what happened.  \nA: Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.  \nB: No, I didn't see it coming at all! It was so unexpected.  \nA: Yeah, I know. I couldn't put it down.  \nB: Me neither. I'm so glad you wanted to read it.  \nA:Yeah, I was curious about the protagonist's journey and how it would evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict.\nB: Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.  \nA: It was definitely a rollercoaster ride. There were so many twists and turns.  \nB: I know! I didn't see any of them coming.  \nA: That's what made it so great. It kept you guessing the whole time.  \nB: Definitely. It was a great book. Thanks for lending it to me.",
+        "speaker_tracks": {
+            "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/A_track.wav",
+            "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/B_track.wav"
+        },
+        "error_type": "error_after_interrupt",
+        "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/stereo_dialogue.wav",
+        "total_duration": 53.57845804988662,
+        "segments": [
+            {
+                "speaker": "A",
+                "text": "So, how did you like the book?",
+                "original_text": "So, how did you like the book?",
+                "start_time": 0,
+                "end_time": 1.6950566893424037,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_0_A.wav",
+                "silence_duration": 0,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "I loved it! The ending was so shocking, I couldn't believe what happened.",
+                "original_text": "I loved it! The ending was so shocking, I couldn't believe what happened.",
+                "start_time": 2.1792484824735485,
+                "end_time": 5.871221271589195,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_1_B.wav",
+                "silence_duration": 0.4841917931311449,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.",
+                "original_text": "Sorry to interrupt, but I just have to ask—did you see that twist with the protagonist coming? I was totally blindsided.",
+                "start_time": 6.47038511683308,
+                "end_time": 14.504489425223102,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_2_A.wav",
+                "silence_duration": 0.5991638452438857,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "No, I didn't see it coming at all! It was so unexpected.",
+                "original_text": "No, I didn't see it coming at all! It was so unexpected.",
+                "start_time": 15.012397119017507,
+                "end_time": 18.448950406999366,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_3_B.wav",
+                "silence_duration": 0.507907693794404,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Yeah, I know. I couldn't put it down.",
+                "original_text": "Yeah, I know. I couldn't put it down.",
+                "start_time": 18.875209136594886,
+                "end_time": 21.847363331606225,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_4_A.wav",
+                "silence_duration": 0.42625872959552,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Me neither. I'm so glad you wanted to read it.",
+                "original_text": "Me neither. I'm so glad you wanted to read it.",
+                "start_time": 22.440054691555087,
+                "end_time": 25.110349476135585,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_5_B.wav",
+                "silence_duration": 0.5926913599488615,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Yeah, I was curious about the protagonist's journey and how it would",
+                "original_text": "Yeah, I was curious about the protagonist's journey and how it would [interrupt] evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict.",
+                "start_time": 25.51803755034393,
+                "end_time": 36.89581532812171,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_6_A.wav",
+                "silence_duration": 0.40768807420834613,
+                "is_interrupted": true,
+                "text_after_interrupt": "evolve, especially after that major setback when they had to completely rethink their entire approach to solving the central conflict."
+            },
+            {
+                "speaker": "B",
+                "text": "Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.",
+                "original_text": "Oh, speaking of the journey, what did you think about that part where the protagonist had to make that impossible choice? It really stuck with me.",
+                "start_time": 29.790509205672727,
+                "end_time": 37.429874285037805,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_7_B.wav",
+                "silence_duration": 0.32835611460902553,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "It was definitely a rollercoaster ride. There were so many twists and turns.",
+                "original_text": "It was definitely a rollercoaster ride. There were so many twists and turns.",
+                "start_time": 37.91219711578734,
+                "end_time": 42.405258340277136,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_8_A.wav",
+                "silence_duration": 0.4823228307495384,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "I know! I didn't see any of them coming.",
+                "original_text": "I know! I didn't see any of them coming.",
+                "start_time": 42.860468420817675,
+                "end_time": 45.08958406707618,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_9_B.wav",
+                "silence_duration": 0.4552100805405374,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "That's what made it so great. It kept you guessing the whole time.",
+                "original_text": "That's what made it so great. It kept you guessing the whole time.",
+                "start_time": 45.679186523390214,
+                "end_time": 49.394379267154385,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_10_A.wav",
+                "silence_duration": 0.5896024563140343,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Definitely. It was a great book. Thanks for lending it to me.",
+                "original_text": "Definitely. It was a great book. Thanks for lending it to me.",
+                "start_time": 49.70074891577286,
+                "end_time": 53.57848134207672,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_7/SODA_PROCESSED--train--179972/temp/line_11_B.wav",
+                "silence_duration": 0.3063696486184793,
+                "is_interrupted": false
+            }
+        ],
+        "gt_score": 1
+    },
+    "SODA_PROCESSED--train--715956": {
+        "original_dialog_id": "",
+        "dialog_index": 715956,
+        "processed_dialogue": "A: Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your [interrupt] help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.\nB: Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.\nA: But you're my lawyer! You're supposed to help me!\nB: Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.\nA: But I can't go to prison! I'll lose my job, my apartment, everything!\nB: Aadya, you need to calm down. Getting upset isn't going to help anything.\nA: Fine. But, you have to promise me that you'll do everything you can to help me.\nB: I promise.",
+        "clean_dialogue": "A:Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.\nB: Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.\nA: But you're my lawyer! You're supposed to help me!\nB: Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.\nA: But I can't go to prison! I'll lose my job, my apartment, everything!\nB: Aadya, you need to calm down. Getting upset isn't going to help anything.\nA: Fine. But, you have to promise me that you'll do everything you can to help me.\nB: I promise.",
+        "speaker_tracks": {
+            "A": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/A_track.wav",
+            "B": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/B_track.wav"
+        },
+        "error_type": "error_after_interrupt",
+        "stereo_audio": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/stereo_dialogue.wav",
+        "total_duration": 49.52126984126984,
+        "segments": [
+            {
+                "speaker": "A",
+                "text": "Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your",
+                "original_text": "Look, I know that I messed up. I was caught with heroin and I'm facing some serious charges. But, I really need your [interrupt] help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time.",
+                "start_time": 0,
+                "end_time": 16.579047619047618,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_0_A.wav",
+                "silence_duration": 0,
+                "is_interrupted": true,
+                "text_after_interrupt": "help to find a way out of this because I'm terrified of what might happen if I don't get proper legal representation and support during this difficult time."
+            },
+            {
+                "speaker": "B",
+                "text": "Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.",
+                "original_text": "Aadya, we've been over this already. The evidence against you is pretty damning. Plus, you have a history of drug use. I don't think there's much that can be done to help you at this point.",
+                "start_time": 8.510113378684807,
+                "end_time": 18.36698412698413,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_1_B.wav",
+                "silence_duration": 0.4899749375576017,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "But you're my lawyer! You're supposed to help me!",
+                "original_text": "But you're my lawyer! You're supposed to help me!",
+                "start_time": 18.846747434390966,
+                "end_time": 21.37772249108031,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_2_A.wav",
+                "silence_duration": 0.4797633074068387,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.",
+                "original_text": "Aadya, I'm doing everything that I can. But, realistically, the chances of you getting out of this are pretty slim. You need to prepare yourself for the possibility of a conviction and think about how you'll handle it.",
+                "start_time": 21.881120947184385,
+                "end_time": 33.51431822609595,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_3_B.wav",
+                "silence_duration": 0.5033984561040751,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "But I can't go to prison! I'll lose my job, my apartment, everything!",
+                "original_text": "But I can't go to prison! I'll lose my job, my apartment, everything!",
+                "start_time": 34.047335561433606,
+                "end_time": 38.48234689930209,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_4_A.wav",
+                "silence_duration": 0.5330173353376504,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "Aadya, you need to calm down. Getting upset isn't going to help anything.",
+                "original_text": "Aadya, you need to calm down. Getting upset isn't going to help anything.",
+                "start_time": 38.89720479711025,
+                "end_time": 43.39026602160004,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_5_B.wav",
+                "silence_duration": 0.4148578978081613,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "A",
+                "text": "Fine. But, you have to promise me that you'll do everything you can to help me.",
+                "original_text": "Fine. But, you have to promise me that you'll do everything you can to help me.",
+                "start_time": 43.92319932038778,
+                "end_time": 48.27694081698642,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_6_A.wav",
+                "silence_duration": 0.5329332987877419,
+                "is_interrupted": false
+            },
+            {
+                "speaker": "B",
+                "text": "I promise.",
+                "original_text": "I promise.",
+                "start_time": 48.62731544236006,
+                "end_time": 49.52128369632831,
+                "audio_file": "/root/autodl-tmp/output_overlapslong/processed_soda_3_processed_dialogues_part_3/SODA_PROCESSED--train--715956/temp/line_7_B.wav",
+                "silence_duration": 0.3503746253736393,
+                "is_interrupted": false
+            }
+        ],
+        "gt_score": 1
+    },
+    "SODA_PROCESSED--train--740576": {
+      "original_text": "A: Good morning, Mr. Nguyen! I hope you're doing well today.\nB: I'm doing well, thank you. How are you?\nA: I'm feeling great today! I have a lot of energy and I'm excited to [interrupt] tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.\nB: Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?\nA: I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really  supportive and collaborative, always willing to share their expertise and help each other grow professionally.\nB: I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?\nA: That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
+      "cleaned_text": "A: Good morning, Mr. Nguyen! I hope you're doing well today.\nB: I'm doing well, thank you. How are you?\nA:I'm feeling great today! I have a lot of energy and I'm excited to tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.\nB: Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?\nA: I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really  supportive and collaborative, always willing to share their expertise and help each other grow professionally.\nB: I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?\nA: That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
+      "total_duration": 49.437278911564626,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Good morning, Mr. Nguyen! I hope you're doing well today.",
+          "original_text": "Good morning, Mr. Nguyen! I hope you're doing well today.",
+          "start_time": 0,
+          "end_time": 3.332063492063492,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "I'm doing well, thank you. How are you?",
+          "original_text": "I'm doing well, thank you. How are you?",
+          "start_time": 3.7838731632362803,
+          "end_time": 5.583419648497051,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_1_B.wav",
+          "silence_duration": 0.4518096711727882,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I'm feeling great today! I have a lot of energy and I'm excited to",
+          "original_text": "I'm feeling great today! I have a lot of energy and I'm excited to [interrupt] tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients.",
+          "start_time": 5.88797031081498,
+          "end_time": 16.96388867816192,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_2_A.wav",
+          "silence_duration": 0.30455066231792893,
+          "is_interrupted": true,
+          "text_after_interrupt": "tackle some new projects and challenges that will help us improve our workflow and achieve better results for our clients."
+        },
+        {
+          "speaker": "B",
+          "text": "Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?",
+          "original_text": "Sorry to interrupt, but I wanted to ask if there's anything specific you're looking forward to today?",
+          "start_time": 10.485521331223143,
+          "end_time": 16.104750356166456,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_3_B.wav",
+          "silence_duration": 0.587489668114177,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really  supportive and collaborative, always willing to share their expertise and help each other grow professionally.",
+          "original_text": "I was going to say I'm excited to start my day. Actually, I'm looking forward to a team meeting we have later. I love working here. It's a great environment and the people are really  supportive and collaborative, always willing to share their expertise and help each other grow professionally.",
+          "start_time": 17.385624216961087,
+          "end_time": 33.94145188136018,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_4_A.wav",
+          "silence_duration": 0.4217355387991674,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?",
+          "original_text": "I'm glad to hear that! Speaking of the team, do you think we should plan more team-building activities to maintain this positive environment?",
+          "start_time": 34.39980783470558,
+          "end_time": 41.74892348096408,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_5_B.wav",
+          "silence_duration": 0.4583559533453947,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
+          "original_text": "That's a great idea! We could definitely benefit from more team-building activities. We're happy to have you on our team.",
+          "start_time": 42.285572803275116,
+          "end_time": 49.437318835021145,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--740576/temp/line_6_A.wav",
+          "silence_duration": 0.5366493223110326,
+          "is_interrupted": false
+        }
+      ]
+    },
+    "SODA_PROCESSED--train--836018": {
+      "original_text": "A: Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with [interrupt] organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.\nB: Actually, I could use some help with the data analysis part. It's a bit overwhelming.\nA: Sure, I can take care of that. So what do you think of the project so far?\nB: It's interesting. I'm learning a lot about different cultures and  how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.\nA: Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.\nB: Yeah, definitely. It's fascinating.",
+      "cleaned_text": "A:Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.\nB: Actually, I could use some help with the data analysis part. It's a bit overwhelming.\nA: Sure, I can take care of that. So what do you think of the project so far?\nB: It's interesting. I'm learning a lot about different cultures and  how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.\nA: Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.\nB: Yeah, definitely. It's fascinating.",
+      "total_duration": 42.34984126984127,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with",
+          "original_text": "Hey Ceanna, I saw that you were doing the reports for the group project. Do you want me to help you with [interrupt] organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned.",
+          "start_time": 0,
+          "end_time": 15.011700680272108,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": true,
+          "text_after_interrupt": "organizing the sections or proofreading? I've got some experience with formatting academic papers and making sure all the citations are properly aligned."
+        },
+        {
+          "speaker": "B",
+          "text": "Actually, I could use some help with the data analysis part. It's a bit overwhelming.",
+          "original_text": "Actually, I could use some help with the data analysis part. It's a bit overwhelming.",
+          "start_time": 6.176507936507937,
+          "end_time": 11.250068027210885,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_1_B.wav",
+          "silence_duration": 0.5190912573415952,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Sure, I can take care of that. So what do you think of the project so far?",
+          "original_text": "Sure, I can take care of that. So what do you think of the project so far?",
+          "start_time": 15.60657282124108,
+          "end_time": 19.937094363191193,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_2_A.wav",
+          "silence_duration": 0.5948721409689715,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "It's interesting. I'm learning a lot about different cultures and  how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.",
+          "original_text": "It's interesting. I'm learning a lot about different cultures and  how they influence people's daily lives, from their eating habits to their social interactions and even their work-life balance perspectives.",
+          "start_time": 20.306213172030862,
+          "end_time": 30.476553308085286,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_3_B.wav",
+          "silence_duration": 0.36911880883966963,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.",
+          "original_text": "Speaking of cultures, did you notice how the traditions vary even within the same country? It's amazing how diverse it can be.",
+          "start_time": 30.848617682402736,
+          "end_time": 39.10331155995375,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_4_A.wav",
+          "silence_duration": 0.3720643743174508,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Yeah, definitely. It's fascinating.",
+          "original_text": "Yeah, definitely. It's fascinating.",
+          "start_time": 39.435776463870354,
+          "end_time": 42.34988077226038,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--836018/temp/line_5_B.wav",
+          "silence_duration": 0.3324649039166007,
+          "is_interrupted": false
+        }
+      ]
+    },
+    "SODA_PROCESSED--train--771154": {
+      "original_text": "A: Hey, Mom. Can I go to Tim's house today?  \nB: No, you can't go to Tim's house today.  \nA: Why [interrupt] can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this.  \nB: Because I said so. And before you ask again, you need to focus on your homework.  \nA: But I don't want to do my homework.  \nB: Well, you have to do it anyway. Now go  and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.  \nA: Wait, can't I just finish it later? I promise I'll get it done before bed.  \nB: No, you need to do it now. Once it's done, then we can talk about other plans.",
+      "cleaned_text": "A: Hey, Mom. Can I go to Tim's house today?  \nB: No, you can't go to Tim's house today.  \nA:Why can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this.\nB: Because I said so. And before you ask again, you need to focus on your homework.  \nA: But I don't want to do my homework.  \nB: Well, you have to do it anyway. Now go  and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.  \nA: Wait, can't I just finish it later? I promise I'll get it done before bed.  \nB: No, you need to do it now. Once it's done, then we can talk about other plans.",
+      "total_duration": 35.76784580498866,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Hey, Mom. Can I go to Tim's house today?",
+          "original_text": "Hey, Mom. Can I go to Tim's house today?",
+          "start_time": 0,
+          "end_time": 3.5294331065759637,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "No, you can't go to Tim's house today.",
+          "original_text": "No, you can't go to Tim's house today.",
+          "start_time": 3.9899851353219105,
+          "end_time": 6.126220962986309,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_1_B.wav",
+          "silence_duration": 0.4605520287459467,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Why",
+          "original_text": "Why [interrupt] can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this.",
+          "start_time": 6.4787876256667465,
+          "end_time": 14.652211661947927,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_2_A.wav",
+          "silence_duration": 0.3525666626804373,
+          "is_interrupted": true,
+          "text_after_interrupt": "can't I go? I finished all my chores and even helped with the dishes after dinner last night, so I really think I deserve this."
+        },
+        {
+          "speaker": "B",
+          "text": "Because I said so. And before you ask again, you need to focus on your homework.",
+          "original_text": "Because I said so. And before you ask again, you need to focus on your homework.",
+          "start_time": 7.210216197095318,
+          "end_time": 11.889037058773322,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_3_B.wav",
+          "silence_duration": 0.4183677243140269,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "But I don't want to do my homework.",
+          "original_text": "But I don't want to do my homework.",
+          "start_time": 15.159162983353092,
+          "end_time": 17.074809241856492,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_4_A.wav",
+          "silence_duration": 0.5069513214051653,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Well, you have to do it anyway. Now go  and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.",
+          "original_text": "Well, you have to do it anyway. Now go  and get started immediately because your teacher specifically mentioned you need to improve your math skills this semester.",
+          "start_time": 17.6716136549098,
+          "end_time": 25.763767849921138,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_5_B.wav",
+          "silence_duration": 0.5968044130533094,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Wait, can't I just finish it later? I promise I'll get it done before bed.",
+          "original_text": "Wait, can't I just finish it later? I promise I'll get it done before bed.",
+          "start_time": 26.149694131743242,
+          "end_time": 31.02588460793372,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_6_A.wav",
+          "silence_duration": 0.38592628182210614,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "No, you need to do it now. Once it's done, then we can talk about other plans.",
+          "original_text": "No, you need to do it now. Once it's done, then we can talk about other plans.",
+          "start_time": 31.518621255026567,
+          "end_time": 35.767872955706835,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--771154/temp/line_7_B.wav",
+          "silence_duration": 0.49273664709284837,
+          "is_interrupted": false
+        }
+      ]
+    },
+    "SODA_PROCESSED--train--1794": {
+      "original_text": "A: Hey, Mom.  \nB: Hey, Moriah. What's up?  \nA: Not much. Just hanging out in my room.  \nB: That's good. I'm glad you're keeping busy.  \nA: Yeah, I'm just trying to stay out of [interrupt] everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately.  \nB: Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.  \nA: I'm just kind of going through some stuff right now.  \nB: Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on  around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.  \nA: Wait, is this about me staying out late last weekend?  \nB: Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.  \nA: I don't know, Mom. Like I said, I'm just dealing with some stuff.  \nB: Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.  \nA: I love you too, Mom.",
+      "cleaned_text": "A: Hey, Mom.  \nB: Hey, Moriah. What's up?  \nA: Not much. Just hanging out in my room.  \nB: That's good. I'm glad you're keeping busy.  \nA:Yeah, I'm just trying to stay out of everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately.\nB: Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.  \nA: I'm just kind of going through some stuff right now.  \nB: Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on  around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.  \nA: Wait, is this about me staying out late last weekend?  \nB: Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.  \nA: I don't know, Mom. Like I said, I'm just dealing with some stuff.  \nB: Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.  \nA: I love you too, Mom.",
+      "total_duration": 57.99024943310658,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Hey, Mom.",
+          "original_text": "Hey, Mom.",
+          "start_time": 0,
+          "end_time": 0.8591383219954648,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Hey, Moriah. What's up?",
+          "original_text": "Hey, Moriah. What's up?",
+          "start_time": 1.2689805234753475,
+          "end_time": 2.7782775756295424,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_1_B.wav",
+          "silence_duration": 0.4098422014798827,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Not much. Just hanging out in my room.",
+          "original_text": "Not much. Just hanging out in my room.",
+          "start_time": 3.2528527196865094,
+          "end_time": 5.505188320593539,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_2_A.wav",
+          "silence_duration": 0.47457514405696677,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "That's good. I'm glad you're keeping busy.",
+          "original_text": "That's good. I'm glad you're keeping busy.",
+          "start_time": 6.047417085120735,
+          "end_time": 8.520342255188762,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_3_B.wav",
+          "silence_duration": 0.5422287645271964,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Yeah, I'm just trying to stay out of",
+          "original_text": "Yeah, I'm just trying to stay out of [interrupt] everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately.",
+          "start_time": 8.88750351109664,
+          "end_time": 18.059385597264438,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_4_A.wav",
+          "silence_duration": 0.3671612559078772,
+          "is_interrupted": true,
+          "text_after_interrupt": "everyone's way and focus on my own things because I've been feeling a bit overwhelmed with school and social stuff lately."
+        },
+        {
+          "speaker": "B",
+          "text": "Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.",
+          "original_text": "Trouble? Is everything okay? I mean, you know you can always talk to me if something's bothering you.",
+          "start_time": 11.697118023568294,
+          "end_time": 18.2915851437497,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_5_B.wav",
+          "silence_duration": 0.32519714638310315,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I'm just kind of going through some stuff right now.",
+          "original_text": "I'm just kind of going through some stuff right now.",
+          "start_time": 18.62204195980515,
+          "end_time": 21.396826540304016,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_6_A.wav",
+          "silence_duration": 0.3304568160554501,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on  around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.",
+          "original_text": "Well, your father and I were just talking about how we need to have a serious talk with you about some things that have been going on  around the house and how you've been feeling lately because we've noticed some changes in your behavior and we're genuinely concerned about your well-being.",
+          "start_time": 21.697523952118004,
+          "end_time": 34.7355284872654,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_7_B.wav",
+          "silence_duration": 0.30069741181398774,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Wait, is this about me staying out late last weekend?",
+          "original_text": "Wait, is this about me staying out late last weekend?",
+          "start_time": 35.29912687220732,
+          "end_time": 38.677630273567864,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_8_A.wav",
+          "silence_duration": 0.5635983849419206,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.",
+          "original_text": "Not just that, but it's part of it. We've also noticed you've been acting a bit differently lately, and we're just wondering if everything is okay with you.",
+          "start_time": 39.09678068392148,
+          "end_time": 45.99310721453372,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_9_B.wav",
+          "silence_duration": 0.4191504103536184,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I don't know, Mom. Like I said, I'm just dealing with some stuff.",
+          "original_text": "I don't know, Mom. Like I said, I'm just dealing with some stuff.",
+          "start_time": 46.3670775788443,
+          "end_time": 50.46539957430915,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_10_A.wav",
+          "silence_duration": 0.3739703643105766,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.",
+          "original_text": "Okay. Well, if you ever want to talk about anything, we're here for you. We love you, Moriah.",
+          "start_time": 50.99388055366539,
+          "end_time": 56.06744064436834,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_11_B.wav",
+          "silence_duration": 0.5284809793562373,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I love you too, Mom.",
+          "original_text": "I love you too, Mom.",
+          "start_time": 56.55062063706958,
+          "end_time": 57.99025782527819,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1794/temp/line_12_A.wav",
+          "silence_duration": 0.4831799927012399,
+          "is_interrupted": false
+        }
+      ]
+    },
+    "SODA_PROCESSED--train--1070688": {
+      "original_text": "A: Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're [interrupt] ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.\nB: Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.\nA: No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.\nB: Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.\nA: I'm glad you're excited! I was about to say I made it this  morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
+      "cleaned_text": "A:Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.\nB: Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.\nA: No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.\nB: Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.\nA: I'm glad you're excited! I was about to say I made it this  morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
+      "total_duration": 66.58453514739229,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're",
+          "original_text": "Hi Karis, I'm so excited to have you over for dinner tonight. I've been planning the menu and setting the table all day. I hope you're [interrupt] ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met.",
+          "start_time": 0,
+          "end_time": 16.172698412698413,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": true,
+          "text_after_interrupt": "ready for a cozy evening with some delicious food and great conversation about your recent travels through Europe that you mentioned last time we met."
+        },
+        {
+          "speaker": "B",
+          "text": "Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.",
+          "original_text": "Oh, I just remembered—I have a slight allergy to shellfish. I know you usually avoid it, but I wanted to mention it just in case.",
+          "start_time": 8.719092970521542,
+          "end_time": 15.650249433106577,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_1_B.wav",
+          "silence_duration": 0.42791712549357114,
+          "is_interrupted": false
+        },{
+          "speaker": "A",
+          "text": "No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.",
+          "original_text": "No worries, there's no shellfish on the menu tonight. Well, let's get started then! For our first course, we'll be having a spinach and feta salad. The feta is from a local farm and the spinach is from my garden. For our main course, I've made chicken Parmesan with homemade tomato sauce and fresh mozzarella cheese. And for dessert, we'll be having tiramisu that I made from scratch this afternoon. I wanted it to be just right for tonight.",
+          "start_time": 16.66087863834312,
+          "end_time": 43.38704643879663,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_2_A.wav",
+          "silence_duration": 0.488180225644707,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.",
+          "original_text": "Tiramisu? That's my favorite dessert! I'm so excited to try it. You really know how to make a meal special.",
+          "start_time": 43.75020989775093,
+          "end_time": 49.926717834258866,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_3_B.wav",
+          "silence_duration": 0.36316345895429397,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "I'm glad you're excited! I was about to say I made it this  morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
+          "original_text": "I'm glad you're excited! I was about to say I made it this  morning using a special family recipe that's been passed down through generations, so it's extra fresh and has that authentic Italian flavor you can't find in restaurants. I hope you enjoy everything!",
+          "start_time": 50.49314394878711,
+          "end_time": 66.58457252021569,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1070688/temp/line_4_A.wav",
+          "silence_duration": 0.5664261145282402,
+          "is_interrupted": false
+        }
+      ]
+    },
+    "SODA_PROCESSED--train--737676": {
+      "original_text": "A: Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your [interrupt] car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.\nB: I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?\nA: Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.\nB: Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your  emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.\nA: Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
+      "cleaned_text": "A:Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.\nB: I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?\nA: Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.\nB: Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your  emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.\nA: Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
+      "total_duration": 52.89809523809524,
+      "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/stereo_dialogue.wav",
+      "speaker_tracks": {
+        "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/A_track.wav",
+        "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/B_track.wav"
+      },
+      "error_type": "error_after_interrupt",
+      "segments": [
+        {
+          "speaker": "A",
+          "text": "Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your",
+          "original_text": "Hey, Miraya. I'm sorry about what happened with the car last night. I was really angry and I didn't mean to take it out on your [interrupt] car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply.",
+          "start_time": 0,
+          "end_time": 16.938956916099773,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_0_A.wav",
+          "silence_duration": 0,
+          "is_interrupted": true,
+          "text_after_interrupt": "car like that, especially since it's your most valuable possession and you've always taken such good care of it. I know it was wrong, and I regret it deeply."
+        },
+        {
+          "speaker": "B",
+          "text": "I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?",
+          "original_text": "I understand, Stephon. But what exactly made you so angry? Was it something specific about what happened earlier in the week?",
+          "start_time": 8.753922902494331,
+          "end_time": 15.348390022675737,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_1_B.wav",
+          "silence_duration": 0.5553895116856843,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.",
+          "original_text": "Yeah, it did. I was really mad at you for a while after that. But I know it wasn't your fault and I shouldn't have taken it out on your car like that.",
+          "start_time": 17.329799609194744,
+          "end_time": 26.582951536632386,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_2_A.wav",
+          "silence_duration": 0.3908426930949695,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "B",
+          "text": "Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your  emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.",
+          "original_text": "Well, since you're being honest and apologetic about it, I don't think there's anything else you need to do other than maybe just be more mindful in the future about how you express your  emotions, especially when you're upset, because lashing out at objects or people never really solves the underlying issue and often makes things worse.",
+          "start_time": 26.900238001740547,
+          "end_time": 44.05978448700132,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_3_B.wav",
+          "silence_duration": 0.3172864651081615,
+          "is_interrupted": false
+        },
+        {
+          "speaker": "A",
+          "text": "Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
+          "original_text": "Absolutely, I'll work on that. And I really appreciate you being so understanding about this. Thanks for giving me the chance to talk it out.",
+          "start_time": 44.64342590433178,
+          "end_time": 52.8981197818828,
+          "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--737676/temp/line_4_A.wav",
+          "silence_duration": 0.5836414173304574,
+          "is_interrupted": false
+        }
+      ]
+    }
+}

ms-swift/swift/cli/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (2.31 kB). View file

ms-swift/swift/cli/_megatron/__init__.py ADDED Viewed

File without changes

ms-swift/swift/cli/deploy.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.llm import deploy_main
+if __name__ == '__main__':
+    deploy_main()

ms-swift/swift/cli/sample.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.llm.sampling import sampling_main
+if __name__ == '__main__':
+    sampling_main()

ms-swift/swift/cli/sft.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from swift.llm import sft_main
+if __name__ == '__main__':
+    sft_main()

ms-swift/swift/cli/web_ui.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.ui import webui_main
+if __name__ == '__main__':
+    webui_main()

ms-swift/swift/hub/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .hub import HFHub, MSHub, get_hub

ms-swift/swift/llm/__init__.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+from swift.utils.import_utils import _LazyModule
+if TYPE_CHECKING:
+    # Recommend using `xxx_main`
+    from .infer import (VllmEngine, RequestConfig, LmdeployEngine, PtEngine, InferEngine, infer_main, deploy_main,
+                        InferClient, run_deploy, AdapterRequest, prepare_model_template, BaseInferEngine, rollout_main)
+    from .export import (export_main, merge_lora, quantize_model, export_to_ollama)
+    from .eval import eval_main
+    from .app import app_main
+    from .train import sft_main, pt_main, rlhf_main, get_multimodal_target_regex
+    from .sampling import sampling_main
+    from .argument import (EvalArguments, InferArguments, TrainArguments, ExportArguments, DeployArguments,
+                           RLHFArguments, WebUIArguments, BaseArguments, AppArguments, SamplingArguments)
+    from .template import (TEMPLATE_MAPPING, Template, Word, get_template, TemplateType, register_template,
+                           TemplateInputs, TemplateMeta, get_template_meta, InferRequest, load_image, MaxLengthError,
+                           load_file, draw_bbox)
+    from .model import (register_model, MODEL_MAPPING, ModelType, get_model_tokenizer, safe_snapshot_download,
+                        HfConfigFactory, ModelInfo, ModelMeta, ModelKeys, register_model_arch, MultiModelKeys,
+                        ModelArch, get_model_arch, MODEL_ARCH_MAPPING, get_model_info_meta, get_model_name, ModelGroup,
+                        Model, get_model_tokenizer_with_flash_attn, get_model_tokenizer_multimodal, load_by_unsloth,
+                        git_clone_github, get_matched_model_meta)
+    from .dataset import (AlpacaPreprocessor, ResponsePreprocessor, MessagesPreprocessor, AutoPreprocessor,
+                          DATASET_MAPPING, MediaResource, register_dataset, register_dataset_info, EncodePreprocessor,
+                          LazyLLMDataset, load_dataset, DATASET_TYPE, sample_dataset, RowPreprocessor, DatasetMeta,
+                          HfDataset, SubsetDataset)
+    from .utils import (deep_getattr, to_float_dtype, to_device, History, Messages, history_to_messages,
+                        messages_to_history, Processor, save_checkpoint, ProcessorMixin,
+                        get_temporary_cache_files_directory, get_cache_dir, is_moe_model)
+    from .base import SwiftPipeline
+    from .data_loader import DataLoaderDispatcher, DataLoaderShard, BatchSamplerShard
+else:
+    _import_structure = {
+        'rlhf': ['rlhf_main'],
+        'infer': [
+            'deploy_main', 'VllmEngine', 'RequestConfig', 'LmdeployEngine', 'PtEngine', 'infer_main', 'InferClient',
+            'run_deploy', 'InferEngine', 'AdapterRequest', 'prepare_model_template', 'BaseInferEngine', 'rollout_main'
+        ],
+        'export': ['export_main', 'merge_lora', 'quantize_model', 'export_to_ollama'],
+        'app': ['app_main'],
+        'eval': ['eval_main'],
+        'train': ['sft_main', 'pt_main', 'rlhf_main', 'get_multimodal_target_regex'],
+        'sampling': ['sampling_main'],
+        'argument': [
+            'EvalArguments', 'InferArguments', 'TrainArguments', 'ExportArguments', 'WebUIArguments', 'DeployArguments',
+            'RLHFArguments', 'BaseArguments', 'AppArguments', 'SamplingArguments'
+        ],
+        'template': [
+            'TEMPLATE_MAPPING', 'Template', 'Word', 'get_template', 'TemplateType', 'register_template',
+            'TemplateInputs', 'TemplateMeta', 'get_template_meta', 'InferRequest', 'load_image', 'MaxLengthError',
+            'load_file', 'draw_bbox'
+        ],
+        'model': [
+            'MODEL_MAPPING', 'ModelType', 'get_model_tokenizer', 'safe_snapshot_download', 'HfConfigFactory',
+            'ModelInfo', 'ModelMeta', 'ModelKeys', 'register_model_arch', 'MultiModelKeys', 'ModelArch',
+            'MODEL_ARCH_MAPPING', 'get_model_arch', 'get_model_info_meta', 'get_model_name', 'register_model',
+            'ModelGroup', 'Model', 'get_model_tokenizer_with_flash_attn', 'get_model_tokenizer_multimodal',
+            'load_by_unsloth', 'git_clone_github', 'get_matched_model_meta'
+        ],
+        'dataset': [
+            'AlpacaPreprocessor', 'MessagesPreprocessor', 'AutoPreprocessor', 'DATASET_MAPPING', 'MediaResource',
+            'register_dataset', 'register_dataset_info', 'EncodePreprocessor', 'LazyLLMDataset', 'load_dataset',
+            'DATASET_TYPE', 'sample_dataset', 'RowPreprocessor', 'ResponsePreprocessor', 'DatasetMeta', 'HfDataset',
+            'SubsetDataset'
+        ],
+        'utils': [
+            'deep_getattr', 'to_device', 'to_float_dtype', 'History', 'Messages', 'history_to_messages',
+            'messages_to_history', 'Processor', 'save_checkpoint', 'ProcessorMixin',
+            'get_temporary_cache_files_directory', 'get_cache_dir', 'is_moe_model'
+        ],
+        'base': ['SwiftPipeline'],
+        'data_loader': ['DataLoaderDispatcher', 'DataLoaderShard', 'BatchSamplerShard'],
+    }
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )

ms-swift/swift/llm/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (2.35 kB). View file

ms-swift/swift/llm/app/build_ui.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from functools import partial
+from typing import Literal, Optional
+import gradio as gr
+from swift.utils import get_file_mm_type
+from ..utils import History
+from .locale import locale_mapping
+def clear_session():
+    return '', [], []
+def modify_system_session(system: str):
+    system = system or ''
+    return system, '', [], []
+def _history_to_messages(history: History, system: Optional[str]):
+    messages = []
+    if system is not None:
+        messages.append({'role': 'system', 'content': system})
+    content = []
+    for h in history:
+        assert isinstance(h, (list, tuple))
+        if isinstance(h[0], tuple):
+            assert h[1] is None
+            file_path = h[0][0]
+            try:
+                mm_type = get_file_mm_type(file_path)
+                content.append({'type': mm_type, mm_type: file_path})
+            except ValueError:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content.append({'type': 'text', 'text': f.read()})
+        else:
+            content.append({'type': 'text', 'text': h[0]})
+            messages.append({'role': 'user', 'content': content})
+            if h[1] is not None:
+                messages.append({'role': 'assistant', 'content': h[1]})
+            content = []
+    return messages
+def _parse_text(text: str) -> str:
+    mapping = {'<': '&lt;', '>': '&gt;', '*': '&ast;'}
+    for k, v in mapping.items():
+        text = text.replace(k, v)
+    return text
+async def model_chat(history: History, real_history: History, system: Optional[str], *, client, model: str,
+                     request_config: Optional['RequestConfig']):
+    if history:
+        from swift.llm import InferRequest
+        messages = _history_to_messages(real_history, system)
+        resp_or_gen = await client.infer_async(
+            InferRequest(messages=messages), request_config=request_config, model=model)
+        if request_config and request_config.stream:
+            response = ''
+            async for resp in resp_or_gen:
+                if resp is None:
+                    continue
+                response += resp.choices[0].delta.content
+                history[-1][1] = _parse_text(response)
+                real_history[-1][-1] = response
+                yield history, real_history
+        else:
+            response = resp_or_gen.choices[0].message.content
+            history[-1][1] = _parse_text(response)
+            real_history[-1][-1] = response
+            yield history, real_history
+    else:
+        yield [], []
+def add_text(history: History, real_history: History, query: str):
+    history = history or []
+    real_history = real_history or []
+    history.append([_parse_text(query), None])
+    real_history.append([query, None])
+    return history, real_history, ''
+def add_file(history: History, real_history: History, file):
+    history = history or []
+    real_history = real_history or []
+    history.append([(file.name, ), None])
+    real_history.append([(file.name, ), None])
+    return history, real_history
+def build_ui(base_url: str,
+             model: Optional[str] = None,
+             *,
+             request_config: Optional['RequestConfig'] = None,
+             is_multimodal: bool = True,
+             studio_title: Optional[str] = None,
+             lang: Literal['en', 'zh'] = 'en',
+             default_system: Optional[str] = None):
+    from swift.llm import InferClient
+    client = InferClient(base_url=base_url)
+    model = model or client.models[0]
+    studio_title = studio_title or model
+    with gr.Blocks() as demo:
+        gr.Markdown(f'<center><font size=8>{studio_title}</center>')
+        with gr.Row():
+            with gr.Column(scale=3):
+                system_input = gr.Textbox(value=default_system, lines=1, label='System')
+            with gr.Column(scale=1):
+                modify_system = gr.Button(locale_mapping['modify_system'][lang], scale=2)
+        chatbot = gr.Chatbot(label='Chatbot')
+        textbox = gr.Textbox(lines=1, label='Input')
+        with gr.Row():
+            upload = gr.UploadButton(locale_mapping['upload'][lang], visible=is_multimodal)
+            submit = gr.Button(locale_mapping['submit'][lang])
+            regenerate = gr.Button(locale_mapping['regenerate'][lang])
+            clear_history = gr.Button(locale_mapping['clear_history'][lang])
+        system_state = gr.State(value=default_system)
+        history_state = gr.State(value=[])
+        model_chat_ = partial(model_chat, client=client, model=model, request_config=request_config)
+        upload.upload(add_file, [chatbot, history_state, upload], [chatbot, history_state])
+        textbox.submit(add_text, [chatbot, history_state, textbox],
+                       [chatbot, history_state, textbox]).then(model_chat_, [chatbot, history_state, system_state],
+                                                               [chatbot, history_state])
+        submit.click(add_text, [chatbot, history_state, textbox],
+                     [chatbot, history_state, textbox]).then(model_chat_, [chatbot, history_state, system_state],
+                                                             [chatbot, history_state])
+        regenerate.click(model_chat_, [chatbot, history_state, system_state], [chatbot, history_state])
+        clear_history.click(clear_session, [], [textbox, chatbot, history_state])
+        modify_system.click(modify_system_session, [system_input], [system_state, textbox, chatbot, history_state])
+    return demo

ms-swift/swift/llm/argument/__pycache__/app_args.cpython-310.pyc ADDED Viewed

Binary file (1.55 kB). View file

ms-swift/swift/llm/argument/__pycache__/eval_args.cpython-310.pyc ADDED Viewed

Binary file (4.96 kB). View file

ms-swift/swift/llm/argument/__pycache__/export_args.cpython-310.pyc ADDED Viewed

Binary file (3.78 kB). View file

ms-swift/swift/llm/argument/__pycache__/rlhf_args.cpython-310.pyc ADDED Viewed

Binary file (11 kB). View file

ms-swift/swift/llm/argument/__pycache__/sampling_args.cpython-310.pyc ADDED Viewed

Binary file (2.79 kB). View file

ms-swift/swift/llm/argument/__pycache__/train_args.cpython-310.pyc ADDED Viewed

Binary file (8.79 kB). View file

ms-swift/swift/llm/argument/__pycache__/tuner_args.cpython-310.pyc ADDED Viewed

Binary file (10.6 kB). View file

ms-swift/swift/llm/argument/__pycache__/webui_args.cpython-310.pyc ADDED Viewed

Binary file (957 Bytes). View file

ms-swift/swift/llm/argument/base_args/__pycache__/generation_args.cpython-310.pyc ADDED Viewed

Binary file (2.25 kB). View file

ms-swift/swift/llm/argument/base_args/__pycache__/quant_args.cpython-310.pyc ADDED Viewed

Binary file (3.18 kB). View file

ms-swift/swift/llm/argument/base_args/base_args.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from dataclasses import dataclass, field, fields
+from typing import Any, Dict, List, Literal, Optional, Union
+import json
+from swift.hub import get_hub
+from swift.llm import Processor, Template, get_model_tokenizer, get_template, load_by_unsloth, safe_snapshot_download
+from swift.llm.utils import get_ckpt_dir
+from swift.plugin import extra_tuners
+from swift.utils import (check_json_format, get_dist_setting, get_logger, import_external_file, is_dist, is_master,
+                         set_device, use_hf_hub)
+from .data_args import DataArguments
+from .generation_args import GenerationArguments
+from .model_args import ModelArguments
+from .quant_args import QuantizeArguments
+from .template_args import TemplateArguments
+logger = get_logger()
+def get_supported_tuners():
+    return {'lora', 'full', 'longlora', 'adalora', 'llamapro', 'adapter', 'vera', 'boft', 'fourierft', 'reft', 'bone'
+            } | set(extra_tuners.keys())
+@dataclass
+class CompatArguments:
+    ckpt_dir: Optional[str] = None
+    lora_modules: List[str] = field(default_factory=list)
+    def _handle_ckpt_dir(self: 'BaseArguments'):
+        assert os.path.isdir(self.ckpt_dir), f'self.ckpt_dir: {self.ckpt_dir}'
+        if (os.path.exists(os.path.join(self.ckpt_dir, 'adapter_config.json'))
+                or os.path.exists(os.path.join(self.ckpt_dir, 'default', 'adapter_config.json'))
+                or os.path.exists(os.path.join(self.ckpt_dir, 'reft'))):
+            if self.ckpt_dir in self.adapters:
+                return
+            self.adapters.insert(0, self.ckpt_dir)
+        else:
+            self.model = self.ckpt_dir
+        self.ckpt_dir = None
+    def __post_init__(self: 'BaseArguments'):
+        if self.ckpt_dir is not None:
+            self._handle_ckpt_dir()
+        if len(self.lora_modules) > 0:
+            self.adapters += self.lora_modules
+@dataclass
+class BaseArguments(CompatArguments, GenerationArguments, QuantizeArguments, DataArguments, TemplateArguments,
+                    ModelArguments):
+    """
+    BaseArguments class is a dataclass that inherits from multiple argument classes:
+    GenerationArguments, QuantizeArguments, DataArguments, TemplateArguments, ModelArguments.
+    Args:
+        tuner_backend(str): Support peft or unsloth.
+        train_type(str): The training type, support all supported tuners and `full`.
+        seed (int): Random seed for reproducibility. Default is 42.
+        model_kwargs (Optional[str]): Additional keyword arguments for the model. Default is None.
+        load_data_args (bool): Flag to determine if dataset configuration should be loaded. Default is False.
+        use_hf (bool): Flag to determine if Hugging Face should be used. Default is False.
+        hub_token (Optional[str]): SDK token for authentication. Default is None.
+        custom_register_path (List[str]): Path to custom .py file for dataset registration. Default is None.
+        ignore_args_error (bool): Flag to ignore argument errors for notebook compatibility. Default is False.
+        use_swift_lora (bool): Use swift lora, a compatible argument
+    """
+    tuner_backend: Literal['peft', 'unsloth'] = 'peft'
+    train_type: str = field(default='lora', metadata={'help': f'train_type choices: {list(get_supported_tuners())}'})
+    adapters: List[str] = field(default_factory=list)
+    external_plugins: List[str] = field(default_factory=list)
+    seed: int = 42
+    model_kwargs: Optional[Union[dict, str]] = None
+    load_args: bool = True
+    load_data_args: bool = False
+    use_hf: bool = False
+    # None: use env var `MODELSCOPE_API_TOKEN`
+    hub_token: Optional[str] = field(
+        default=None, metadata={'help': 'SDK token can be found in https://modelscope.cn/my/myaccesstoken'})
+    custom_register_path: List[str] = field(default_factory=list)  # .py
+    # extra
+    ignore_args_error: bool = False  # True: notebook compatibility
+    use_swift_lora: bool = False  # True for using tuner_backend == swift, don't specify this unless you know what you are doing # noqa
+    def _prepare_training_args(self, training_args: Dict[str, Any]) -> None:
+        pass
+    def _init_custom_register(self) -> None:
+        """Register custom .py file to datasets"""
+        if isinstance(self.custom_register_path, str):
+            self.custom_register_path = [self.custom_register_path]
+        if not self.custom_register_path:
+            return
+        for path in self.custom_register_path:
+            import_external_file(path)
+        logger.info(f'Successfully registered {self.custom_register_path}.')
+    def _import_external_plugins(self):
+        if isinstance(self.external_plugins, str):
+            self.external_plugins = [self.external_plugins]
+        if not self.external_plugins:
+            return
+        for external_plugin in self.external_plugins:
+            import_external_file(external_plugin)
+        logger.info(f'Successfully imported external_plugins: {self.external_plugins}.')
+    @staticmethod
+    def _check_is_adapter(adapter_dir: str) -> bool:
+        if (os.path.exists(os.path.join(adapter_dir, 'adapter_config.json'))
+                or os.path.exists(os.path.join(adapter_dir, 'default', 'adapter_config.json'))
+                or os.path.exists(os.path.join(adapter_dir, 'reft'))):
+            return True
+        return False
+    def _init_adapters(self):
+        if isinstance(self.adapters, str):
+            self.adapters = [self.adapters]
+        self.adapters = [
+            safe_snapshot_download(adapter, use_hf=self.use_hf, hub_token=self.hub_token) for adapter in self.adapters
+        ]
+    def __post_init__(self):
+        if self.use_hf or use_hf_hub():
+            self.use_hf = True
+            os.environ['USE_HF'] = '1'
+        CompatArguments.__post_init__(self)
+        self._init_adapters()
+        self._init_ckpt_dir()
+        self._init_custom_register()
+        self._import_external_plugins()
+        self._init_model_kwargs()
+        # The Seq2SeqTrainingArguments has a property called world_size, which cannot be assigned a value.
+        self.rank, self.local_rank, self.global_world_size, self.local_world_size = get_dist_setting()
+        logger.info(f'rank: {self.rank}, local_rank: {self.local_rank}, '
+                    f'world_size: {self.global_world_size}, local_world_size: {self.local_world_size}')
+        if self.train_type not in extra_tuners:
+            for adapter in self.adapters:
+                assert self._check_is_adapter(adapter), (
+                    f'`{adapter}` is not an adapter, please try using `--model` to pass it.')
+        ModelArguments.__post_init__(self)
+        QuantizeArguments.__post_init__(self)
+        TemplateArguments.__post_init__(self)
+        DataArguments.__post_init__(self)
+        self.hub = get_hub(self.use_hf)
+        if self.hub.try_login(self.hub_token):
+            logger.info('hub login successful!')
+    def _init_model_kwargs(self):
+        """Prepare model kwargs and set them to the env"""
+        self.model_kwargs: Dict[str, Any] = self.parse_to_dict(self.model_kwargs)
+        for k, v in self.model_kwargs.items():
+            k = k.upper()
+            os.environ[k] = str(v)
+    @property
+    def is_adapter(self) -> bool:
+        return self.train_type not in {'full'}
+    @property
+    def supported_tuners(self):
+        return get_supported_tuners()
+    @property
+    def adapters_can_be_merged(self):
+        return {'lora', 'longlora', 'llamapro', 'adalora'}
+    @classmethod
+    def from_pretrained(cls, checkpoint_dir: str):
+        self = super().__new__(cls)
+        self.load_data_args = True
+        self.ckpt_dir = checkpoint_dir
+        self.load_args_from_ckpt()
+        all_keys = list(f.name for f in fields(BaseArguments))
+        for key in all_keys:
+            if not hasattr(self, key):
+                setattr(self, key, None)
+        return self
+    def _init_ckpt_dir(self, adapters=None):
+        # compat megatron
+        model = self.model or getattr(self, 'mcore_model', None) or getattr(self, 'load', None)
+        self.ckpt_dir = get_ckpt_dir(model, adapters or self.adapters)
+        if self.ckpt_dir and self.load_args:
+            self.load_args_from_ckpt()
+    def load_args_from_ckpt(self) -> None:
+        from ..train_args import TrainArguments
+        args_path = os.path.join(self.ckpt_dir, 'args.json')
+        assert os.path.exists(args_path), f'args_path: {args_path}'
+        with open(args_path, 'r', encoding='utf-8') as f:
+            old_args = json.load(f)
+        all_keys = list(f.name for f in fields(BaseArguments))
+        data_keys = list(f.name for f in fields(DataArguments))
+        load_keys = [
+            # quant_args
+            'bnb_4bit_quant_type',
+            'bnb_4bit_use_double_quant',
+            # base_args
+            'train_type',
+            'tuner_backend',
+            'use_swift_lora',
+            # data_args
+            'model_name',
+            'model_author',
+            'split_dataset_ratio',
+            # template_args
+            'use_chat_template',
+        ]
+        skip_keys = list(f.name for f in fields(GenerationArguments) + fields(CompatArguments)) + ['adapters']
+        if not isinstance(self, TrainArguments):
+            skip_keys += ['max_length']
+        all_keys = set(all_keys) - set(skip_keys)
+        for key, old_value in old_args.items():
+            if key not in all_keys or old_value is None:
+                continue
+            if not self.load_data_args and key in data_keys:
+                continue
+            value = getattr(self, key, None)
+            if value is None or isinstance(value, (list, tuple)) and len(value) == 0 or key in load_keys:
+                setattr(self, key, old_value)
+        logger.info(f'Successfully loaded {args_path}.')
+    def save_args(self, output_dir=None) -> None:
+        if is_master():
+            output_dir = output_dir or self.output_dir
+            os.makedirs(output_dir, exist_ok=True)
+            fpath = os.path.join(output_dir, 'args.json')
+            logger.info(f'The {self.__class__.__name__} will be saved in: {fpath}')
+            with open(fpath, 'w', encoding='utf-8') as f:
+                json.dump(check_json_format(self.__dict__), f, ensure_ascii=False, indent=2)
+    def _init_device(self):
+        if is_dist():
+            set_device()
+    def get_template(self, processor: 'Processor', template_type: Optional[str] = None) -> 'Template':
+        template_kwargs = self.get_template_kwargs()
+        template_type = template_type or self.template
+        template = get_template(template_type, processor, **template_kwargs)
+        return template
+    def get_model_processor(self,
+                            *,
+                            model=None,
+                            model_type=None,
+                            model_revision=None,
+                            task_type=None,
+                            num_labels=None,
+                            **kwargs):
+        if self.tuner_backend == 'unsloth':
+            return load_by_unsloth(self)
+        kwargs.update(self.get_model_kwargs())
+        # compat rlhf
+        kwargs['model_id_or_path'] = model or self.model
+        kwargs['model_type'] = model_type or self.model_type
+        kwargs['model_revision'] = model_revision or self.model_revision
+        kwargs['task_type'] = task_type or self.task_type
+        kwargs['num_labels'] = num_labels or self.num_labels
+        return get_model_tokenizer(**kwargs)

ms-swift/swift/llm/argument/base_args/model_args.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import ast
+import math
+import os
+from dataclasses import dataclass, field
+from typing import Any, Dict, Literal, Optional, Union
+import json
+import torch
+from transformers.utils import is_torch_mps_available
+from swift.llm import MODEL_MAPPING, HfConfigFactory, get_model_info_meta, get_model_name
+from swift.utils import get_dist_setting, get_logger
+logger = get_logger()
+@dataclass
+class ModelArguments:
+    """
+    ModelArguments class is a dataclass that holds various arguments related to model configuration and usage.
+    Args:
+        model (Optional[str]): model_id or model_path. Default is None.
+        model_type (Optional[str]): Type of the model group. Default is None.
+        model_revision (Optional[str]): Revision of the model. Default is None.
+        torch_dtype (Literal): Model parameter dtype. Default is None.
+        attn_impl (Literal): Attention implementation to use. Default is None.
+        num_labels (Optional[int]): Number of labels for classification tasks. Default is None.
+        rope_scaling (Literal): Type of rope scaling to use. Default is None.
+        device_map (Optional[str]): Configuration for device mapping. Default is None.
+        local_repo_path (Optional[str]): Path to the local github repository for model. Default is None.
+        init_strategy (Literal): Strategy to initialize all uninitialized parameters. Default is None.
+    """
+    model: Optional[str] = None  # model id or model path
+    model_type: Optional[str] = field(
+        default=None, metadata={'help': f'model_type choices: {list(MODEL_MAPPING.keys())}'})
+    model_revision: Optional[str] = None
+    task_type: Literal['causal_lm', 'seq_cls', 'embedding'] = None
+    torch_dtype: Literal['bfloat16', 'float16', 'float32', None] = None
+    # flash_attn: It will automatically convert names based on the model.
+    # None: It will be automatically selected between sdpa and eager.
+    attn_impl: Literal['flash_attn', 'sdpa', 'eager', 'flex_attention', None] = None
+    num_labels: Optional[int] = None
+    problem_type: Literal['regression', 'single_label_classification', 'multi_label_classification'] = None
+    rope_scaling: Literal['linear', 'dynamic'] = None
+    device_map: Optional[Union[dict, str]] = None
+    max_memory: Optional[Union[dict, str]] = None
+    # When some model code needs to be downloaded from GitHub,
+    # this parameter specifies the path to the locally downloaded repository.
+    local_repo_path: Optional[str] = None
+    init_strategy: Literal['zero', 'uniform', 'normal', 'xavier_uniform', 'xavier_normal', 'kaiming_uniform',
+                           'kaiming_normal', 'orthogonal'] = None
+    @staticmethod
+    def parse_to_dict(value: Union[str, Dict, None], strict: bool = True) -> Union[str, Dict]:
+        """Convert a JSON string or JSON file into a dict"""
+        # If the value could potentially be a string, it is generally advisable to set strict to False.
+        if value is None:
+            value = {}
+        elif isinstance(value, str):
+            if os.path.exists(value):  # local path
+                with open(value, 'r', encoding='utf-8') as f:
+                    value = json.load(f)
+            else:  # json str
+                try:
+                    value = json.loads(value)
+                except json.JSONDecodeError:
+                    if strict:
+                        logger.error(f"Unable to parse string: '{value}'")
+                        raise
+        return value
+    def _init_device_map(self):
+        """Prepare device map args"""
+        if self.device_map:
+            self.device_map: Union[str, Dict[str, Any], None] = self.parse_to_dict(self.device_map, strict=False)
+        # compat mp&ddp
+        _, local_rank, _, local_world_size = get_dist_setting()
+        if local_world_size > 1 and isinstance(self.device_map, dict) and local_rank > 0:
+            for k, v in self.device_map.items():
+                if isinstance(v, int):
+                    self.device_map[k] += local_rank
+    def _init_max_memory(self):
+        if isinstance(self.max_memory, str):
+            try:
+                self.max_memory = ast.literal_eval(self.max_memory)
+            except Exception:
+                pass
+        self.max_memory = self.parse_to_dict(self.max_memory)
+        # compat mp&ddp
+        _, local_rank, _, local_world_size = get_dist_setting()
+        if local_world_size > 1 and isinstance(self.max_memory, dict) and local_rank > 0:
+            for k in list(self.max_memory.keys()):
+                if isinstance(k, int):
+                    self.max_memory[k + local_rank] = self.max_memory.pop(k)
+    def _init_torch_dtype(self) -> None:
+        """"If torch_dtype is None, find a proper dtype by the train_type/GPU"""
+        from swift.llm import TrainArguments
+        self.torch_dtype: Optional[torch.dtype] = HfConfigFactory.to_torch_dtype(self.torch_dtype)
+        self.torch_dtype: torch.dtype = self._init_model_info()
+        # Mixed Precision Training
+        if isinstance(self, TrainArguments):
+            self._init_mixed_precision()
+    def _init_mixed_precision(self):
+        if is_torch_mps_available():
+            fp16, bf16 = False, False
+        elif self.torch_dtype in {torch.float16, torch.float32}:
+            fp16, bf16 = True, False
+        elif self.torch_dtype == torch.bfloat16:
+            fp16, bf16 = False, True
+        else:
+            raise ValueError(f'args.torch_dtype: {self.torch_dtype}')
+        if self.fp16 is None:
+            self.fp16 = fp16
+        if self.bf16 is None:
+            self.bf16 = bf16
+    def _init_rope_scaling(self):
+        assert self.max_length is not None, 'Use max_model_len together with rope_scaling'
+        rope_scaling = self.model_info.rope_scaling or {}
+        max_model_len = self.model_info.max_model_len
+        rope_scaling_factor = 1.0
+        if max_model_len:
+            rope_scaling_factor = max(float(math.ceil(self.max_length / max_model_len)), 1.0)
+        if rope_scaling:
+            rope_scaling_factor = max(rope_scaling.get('factor', -1), rope_scaling_factor)
+            rope_scaling['type'] = self.rope_scaling
+            rope_scaling['factor'] = rope_scaling_factor
+        else:
+            rope_scaling = {'type': self.rope_scaling, 'factor': rope_scaling_factor}
+        self.rope_scaling = rope_scaling
+        logger.info(f'rope_scaling is set to type: {self.rope_scaling}')
+    def _init_model_info(self) -> torch.dtype:
+        self.model_info, self.model_meta = get_model_info_meta(**self.get_model_kwargs())
+        self.task_type = self.model_info.task_type
+        self.num_labels = self.model_info.num_labels
+        self.model_dir = self.model_info.model_dir
+        self.model_type = self.model_info.model_type
+        if isinstance(self.rope_scaling, str):
+            self._init_rope_scaling()
+        return self.model_info.torch_dtype
+    def __post_init__(self):
+        if self.model is None:
+            raise ValueError(f'Please set --model <model_id_or_path>`, model: {self.model}')
+        self.model_suffix = get_model_name(self.model)
+        self._init_device_map()
+        self._init_max_memory()
+        self._init_torch_dtype()
+    def get_model_kwargs(self):
+        return {
+            'model_id_or_path': self.model,
+            'torch_dtype': self.torch_dtype,
+            'model_type': self.model_type,
+            'revision': self.model_revision,
+            'use_hf': self.use_hf,
+            'hub_token': self.hub_token,
+            'local_repo_path': self.local_repo_path,
+            'device_map': self.device_map,
+            'max_memory': self.max_memory,
+            'quantization_config': self.get_quantization_config(),
+            'attn_impl': self.attn_impl,
+            'rope_scaling': self.rope_scaling,
+            'task_type': self.task_type,
+            'num_labels': self.num_labels,
+            'problem_type': self.problem_type,
+            'init_strategy': self.init_strategy,
+        }

ms-swift/swift/llm/argument/base_args/quant_args.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from dataclasses import dataclass
+from typing import Literal, Optional
+import torch
+from swift.llm import HfConfigFactory
+@dataclass
+class QuantizeArguments:
+    """
+    QuantizeArguments is a dataclass that holds the configuration for model quantization.
+    Args:
+        quant_method (Literal['bnb', 'hqq', 'eetq']): The quantization method to be used.
+        quant_bits (Literal[1, 2, 3, 4, 8]): The number of bits to use for quantization.
+        hqq_axis (Optional[int]): The axis for hqq quantization.
+        bnb_4bit_compute_dtype (Literal['float16', 'bfloat16', 'float32', None]):
+            The compute dtype for bnb 4-bit quantization.
+        bnb_4bit_quant_type (Literal['fp4', 'nf4']): The quantization type for bnb 4-bit quantization.
+        bnb_4bit_use_double_quant (bool): Whether to use double quantization for bnb 4-bit quantization.
+        bnb_4bit_quant_storage (Optional[str]): This sets the storage type to pack the quantized 4-bit params.
+    """
+    # awq, gptq, and aqlm need to be pre-quantized models.
+    #   It can be detected automatically, without the need to pass in.
+    # while bnb, hqq, and eetq can be quantized during SFT using the original models.
+    quant_method: Literal['bnb', 'hqq', 'eetq', 'quanto'] = None
+    # bnb: 4,8; hqq: 1,2,3,4,8'; eetq: 8
+    # awq: 4; gptq: 2,3,4,8
+    quant_bits: Literal[1, 2, 3, 4, 8, 'float8'] = None
+    # hqq
+    hqq_axis: Optional[int] = None
+    # bnb
+    bnb_4bit_compute_dtype: Literal['float16', 'bfloat16', 'float32', None] = None
+    bnb_4bit_quant_type: Literal['fp4', 'nf4'] = 'nf4'
+    bnb_4bit_use_double_quant: bool = True
+    bnb_4bit_quant_storage: Optional[str] = None
+    def get_quantization_config(self):
+        if self.quant_method is None or self.quant_method in {'awq', 'gptq'}:
+            return None
+        assert self.quant_method in {'bnb', 'hqq', 'eetq', 'quanto'}
+        if self.quant_bits is None:
+            raise ValueError(f'Please set the quant_bits. args.quant_bits: {self.quant_bits}')
+        if self.quant_method == 'bnb':
+            if self.quant_bits == 4:
+                load_in_4bit, load_in_8bit = True, False
+            elif self.quant_bits == 8:
+                load_in_4bit, load_in_8bit = False, True
+            else:
+                raise ValueError(f'bnb not support quant_bits: {self.quant_bits}')
+            from transformers import BitsAndBytesConfig
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=load_in_4bit,
+                load_in_8bit=load_in_8bit,
+                bnb_4bit_compute_dtype=self.bnb_4bit_compute_dtype,
+                bnb_4bit_quant_type=self.bnb_4bit_quant_type,
+                bnb_4bit_use_double_quant=self.bnb_4bit_use_double_quant,
+                bnb_4bit_quant_storage=self.bnb_4bit_quant_storage)
+        elif self.quant_method == 'hqq':
+            from transformers import HqqConfig
+            quantization_config = HqqConfig(nbits=self.quant_bits, axis=self.hqq_axis)
+        elif self.quant_method == 'quanto':
+            from transformers import QuantoConfig
+            if self.quant_bits == 8:
+                weights = 'int8'
+            elif self.quant_bits == 'float8':
+                weights = 'float8'
+            elif self.quant_bits == 4:
+                weights = 'int4'
+            elif self.quant_bits == 2:
+                weights = 'int2'
+            else:
+                raise ValueError('quanto quantization only support quant bits 2/4/8/float8')
+            quantization_config = QuantoConfig(weights=weights)
+        else:  # 'eetq'
+            from transformers import EetqConfig
+            quantization_config = EetqConfig(f'int{self.quant_bits}')
+        return quantization_config
+    def __post_init__(self):
+        if self.bnb_4bit_compute_dtype is None:
+            if self.torch_dtype in {torch.float16, torch.float32}:
+                self.bnb_4bit_compute_dtype = torch.float32
+            elif self.torch_dtype == torch.bfloat16:
+                self.bnb_4bit_compute_dtype = torch.bfloat16
+        self.bnb_4bit_compute_dtype: torch.dtype = HfConfigFactory.to_torch_dtype(self.bnb_4bit_compute_dtype)

ms-swift/swift/llm/argument/deploy_args.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from dataclasses import dataclass
+from typing import Optional
+from swift.llm import safe_snapshot_download
+from swift.utils import find_free_port, get_logger
+from .infer_args import InferArguments
+logger = get_logger()
+@dataclass
+class DeployArguments(InferArguments):
+    """
+    DeployArguments is a dataclass that extends InferArguments and is used to define
+    the arguments required for deploying a model.
+    Args:
+        host (str): The host address to bind the server to. Default is '0.0.0.0'.
+        port (int): The port number to bind the server to. Default is 8000.
+        api_key (Optional[str]): The API key for authentication. Default is None.
+        ssl_keyfile (Optional[str]): The path to the SSL key file. Default is None.
+        ssl_certfile (Optional[str]): The path to the SSL certificate file. Default is None.
+        owned_by (str): The owner of the deployment. Default is 'swift'.
+        served_model_name (Optional[str]): The name of the model being served. Default is None.
+        verbose (bool): Whether to log request information. Default is True.
+        log_interval (int): The interval for printing global statistics. Default is 20.
+        max_logprobs(int): Max number of logprobs to return
+    """
+    host: str = '0.0.0.0'
+    port: int = 8000
+    api_key: Optional[str] = None
+    ssl_keyfile: Optional[str] = None
+    ssl_certfile: Optional[str] = None
+    owned_by: str = 'swift'
+    served_model_name: Optional[str] = None
+    verbose: bool = True  # Whether to log request_info
+    log_interval: int = 20  # Interval for printing global statistics
+    max_logprobs: int = 20
+    def __post_init__(self):
+        super().__post_init__()
+        self.port = find_free_port(self.port)
+    def _init_adapters(self):
+        if isinstance(self.adapters, str):
+            self.adapters = [self.adapters]
+        self.adapter_mapping = {}
+        adapters = []
+        for i, adapter in enumerate(self.adapters):
+            adapter_path = adapter.split('=')
+            if len(adapter_path) == 1:
+                adapter_path = (None, adapter_path[0])
+            adapter_name, adapter_path = adapter_path
+            adapter_path = safe_snapshot_download(adapter_path, use_hf=self.use_hf, hub_token=self.hub_token)
+            if adapter_name is None:
+                adapters.append(adapter_path)
+            else:
+                self.adapter_mapping[adapter_name] = adapter_path
+        self.adapters = adapters
+    def _init_ckpt_dir(self, adapters=None):
+        return super()._init_ckpt_dir(self.adapters + list(self.adapter_mapping.values()))
+    def _init_stream(self):
+        pass
+    def _init_eval_human(self):
+        pass
+    def _init_result_path(self, folder_name: str) -> None:
+        if folder_name == 'infer_result':
+            folder_name = 'deploy_result'
+        return super()._init_result_path(folder_name)

ms-swift/swift/llm/argument/export_args.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from dataclasses import dataclass
+from typing import Literal, Optional
+import torch
+import torch.distributed as dist
+from swift.utils import get_logger, init_process_group, set_default_ddp_config
+from .base_args import BaseArguments, to_abspath
+from .merge_args import MergeArguments
+logger = get_logger()
+@dataclass
+class ExportArguments(MergeArguments, BaseArguments):
+    """
+    ExportArguments is a dataclass that inherits from BaseArguments and MergeArguments.
+    Args:
+        output_dir (Optional[str]): Directory where the output will be saved.
+        quant_n_samples (int): Number of samples for quantization.
+        max_length (int): Sequence length for quantization.
+        quant_batch_size (int): Batch size for quantization.
+        to_ollama (bool): Flag to indicate export model to ollama format.
+        push_to_hub (bool): Flag to indicate if the output should be pushed to the model hub.
+        hub_model_id (Optional[str]): Model ID for the hub.
+        hub_private_repo (bool): Flag to indicate if the hub repository is private.
+        commit_message (str): Commit message for pushing to the hub.
+        to_peft_format (bool): Flag to indicate if the output should be in PEFT format.
+            This argument is useless for now.
+    """
+    output_dir: Optional[str] = None
+    # awq/gptq
+    quant_method: Literal['awq', 'gptq', 'bnb'] = None
+    quant_n_samples: int = 256
+    max_length: int = 2048
+    quant_batch_size: int = 1
+    group_size: int = 128
+    # ollama
+    to_ollama: bool = False
+    # megatron
+    to_mcore: bool = False
+    to_hf: bool = False
+    mcore_model: Optional[str] = None
+    thread_count: Optional[int] = None
+    test_convert_precision: bool = False
+    # push to ms hub
+    push_to_hub: bool = False
+    # 'user_name/repo_name' or 'repo_name'
+    hub_model_id: Optional[str] = None
+    hub_private_repo: bool = False
+    commit_message: str = 'update files'
+    # compat
+    to_peft_format: bool = False
+    exist_ok: bool = False
+    def _init_output_dir(self):
+        if self.output_dir is None:
+            ckpt_dir = self.ckpt_dir or f'./{self.model_suffix}'
+            ckpt_dir, ckpt_name = os.path.split(ckpt_dir)
+            if self.to_peft_format:
+                suffix = 'peft'
+            elif self.quant_method:
+                suffix = f'{self.quant_method}-int{self.quant_bits}'
+            elif self.to_ollama:
+                suffix = 'ollama'
+            elif self.merge_lora:
+                suffix = 'merged'
+            elif self.to_mcore:
+                suffix = 'mcore'
+            elif self.to_hf:
+                suffix = 'hf'
+            else:
+                return
+            self.output_dir = os.path.join(ckpt_dir, f'{ckpt_name}-{suffix}')
+        self.output_dir = to_abspath(self.output_dir)
+        if not self.exist_ok and os.path.exists(self.output_dir):
+            raise FileExistsError(f'args.output_dir: `{self.output_dir}` already exists.')
+        logger.info(f'args.output_dir: `{self.output_dir}`')
+    def __post_init__(self):
+        if self.quant_batch_size == -1:
+            self.quant_batch_size = None
+        if self.quant_bits and self.quant_method is None:
+            raise ValueError('Please specify the quantization method using `--quant_method awq/gptq/bnb`.')
+        if self.quant_method and self.quant_bits is None:
+            raise ValueError('Please specify `--quant_bits`.')
+        if self.quant_method in {'gptq', 'awq'} and self.torch_dtype is None:
+            self.torch_dtype = torch.float16
+        if self.to_mcore or self.to_hf:
+            self.mcore_model = to_abspath(self.mcore_model, check_path_exist=True)
+            if not dist.is_initialized():
+                set_default_ddp_config()
+                init_process_group()
+        BaseArguments.__post_init__(self)
+        self._init_output_dir()
+        if self.quant_method in {'gptq', 'awq'} and len(self.dataset) == 0:
+            raise ValueError(f'self.dataset: {self.dataset}, Please input the quant dataset.')

ms-swift/swift/llm/argument/infer_args.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import datetime as dt
+import os
+from dataclasses import dataclass
+from typing import Literal, Optional, Union
+import torch.distributed as dist
+from swift.utils import get_logger, init_process_group, is_dist
+from .base_args import BaseArguments, to_abspath
+from .base_args.model_args import ModelArguments
+from .merge_args import MergeArguments
+logger = get_logger()
+@dataclass
+class LmdeployArguments:
+    """
+    LmdeployArguments is a dataclass that holds the configuration for lmdeploy.
+    Args:
+        tp (int): Tensor parallelism size. Default is 1.
+        session_len(Optional[int]): The session length, default None.
+        cache_max_entry_count (float): Maximum entry count for cache. Default is 0.8.
+        quant_policy (int): Quantization policy, e.g., 4, 8. Default is 0.
+        vision_batch_size (int): Maximum batch size in VisionConfig. Default is 1.
+    """
+    # lmdeploy
+    tp: int = 1
+    session_len: Optional[int] = None
+    cache_max_entry_count: float = 0.8
+    quant_policy: int = 0  # e.g. 4, 8
+    vision_batch_size: int = 1  # max_batch_size in VisionConfig
+    def get_lmdeploy_engine_kwargs(self):
+        kwargs = {
+            'tp': self.tp,
+            'session_len': self.session_len,
+            'cache_max_entry_count': self.cache_max_entry_count,
+            'quant_policy': self.quant_policy,
+            'vision_batch_size': self.vision_batch_size
+        }
+        if dist.is_initialized():
+            kwargs.update({'devices': [dist.get_rank()]})
+        return kwargs
+@dataclass
+class VllmArguments:
+    """
+    VllmArguments is a dataclass that holds the configuration for vllm.
+    Args:
+        gpu_memory_utilization (float): GPU memory utilization. Default is 0.9.
+        tensor_parallel_size (int): Tensor parallelism size. Default is 1.
+        pipeline_parallel_size(int): Pipeline parallelism size. Default is 1.
+        max_num_seqs (int): Maximum number of sequences. Default is 256.
+        max_model_len (Optional[int]): Maximum model length. Default is None.
+        disable_custom_all_reduce (bool): Flag to disable custom all-reduce. Default is False.
+        enforce_eager (bool): Flag to enforce eager execution. Default is False.
+        limit_mm_per_prompt (Optional[str]): Limit multimedia per prompt. Default is None.
+        vllm_max_lora_rank (int): Maximum LoRA rank. Default is 16.
+        enable_prefix_caching (bool): Flag to enable automatic prefix caching. Default is False.
+    """
+    # vllm
+    gpu_memory_utilization: float = 0.9
+    tensor_parallel_size: int = 1
+    pipeline_parallel_size: int = 1
+    max_num_seqs: int = 256
+    max_model_len: Optional[int] = None
+    disable_custom_all_reduce: bool = False
+    enforce_eager: bool = False
+    limit_mm_per_prompt: Optional[Union[dict, str]] = None  # '{"image": 5, "video": 2}'
+    vllm_max_lora_rank: int = 16
+    enable_prefix_caching: bool = False
+    use_async_engine: bool = True
+    data_parallel_size: int = 1
+    log_level: Literal['critical', 'error', 'warning', 'info', 'debug', 'trace'] = 'info'
+    vllm_quantization: Optional[str] = None
+    def __post_init__(self):
+        self.limit_mm_per_prompt = ModelArguments.parse_to_dict(self.limit_mm_per_prompt)
+    def get_vllm_engine_kwargs(self):
+        adapters = self.adapters
+        if hasattr(self, 'adapter_mapping'):
+            adapters = adapters + list(self.adapter_mapping.values())
+        kwargs = {
+            'gpu_memory_utilization': self.gpu_memory_utilization,
+            'tensor_parallel_size': self.tensor_parallel_size,
+            'pipeline_parallel_size': self.pipeline_parallel_size,
+            'max_num_seqs': self.max_num_seqs,
+            'max_model_len': self.max_model_len,
+            'disable_custom_all_reduce': self.disable_custom_all_reduce,
+            'enforce_eager': self.enforce_eager,
+            'limit_mm_per_prompt': self.limit_mm_per_prompt,
+            'max_lora_rank': self.vllm_max_lora_rank,
+            'enable_lora': len(adapters) > 0,
+            'max_loras': max(len(adapters), 1),
+            'enable_prefix_caching': self.enable_prefix_caching,
+            'quantization': self.vllm_quantization,
+        }
+        if dist.is_initialized():
+            kwargs.update({'device': dist.get_rank()})
+        return kwargs
+@dataclass
+class InferArguments(MergeArguments, VllmArguments, LmdeployArguments, BaseArguments):
+    """
+    InferArguments is a dataclass that extends BaseArguments, MergeArguments, VllmArguments, and LmdeployArguments.
+    It is used to define the arguments required for model inference.
+    Args:
+        ckpt_dir (Optional[str]): Directory to the checkpoint. Default is None.
+        infer_backend (Literal): Backend to use for inference. Default is 'pt'.
+            Allowed values are 'vllm', 'pt', 'lmdeploy'.
+        result_path (Optional[str]): Directory to store inference results. Default is None.
+        max_batch_size (int): Maximum batch size for the pt engine. Default is 1.
+        val_dataset_sample (Optional[int]): Sample size for validation dataset. Default is None.
+    """
+    infer_backend: Literal['vllm', 'pt', 'lmdeploy'] = 'pt'
+    result_path: Optional[str] = None
+    metric: Literal['acc', 'rouge'] = None
+    # for pt engine
+    max_batch_size: int = 1
+    ddp_backend: Optional[str] = None
+    # only for inference
+    val_dataset_sample: Optional[int] = None
+    def _get_result_path(self, folder_name: str) -> str:
+        result_dir = self.ckpt_dir or f'result/{self.model_suffix}'
+        os.makedirs(result_dir, exist_ok=True)
+        result_dir = to_abspath(os.path.join(result_dir, folder_name))
+        os.makedirs(result_dir, exist_ok=True)
+        time = dt.datetime.now().strftime('%Y%m%d-%H%M%S')
+        return os.path.join(result_dir, f'{time}.jsonl')
+    def _init_result_path(self, folder_name: str) -> None:
+        if self.result_path is not None:
+            self.result_path = to_abspath(self.result_path)
+            return
+        self.result_path = self._get_result_path(folder_name)
+        logger.info(f'args.result_path: {self.result_path}')
+    def _init_stream(self):
+        self.eval_human = not (self.dataset and self.split_dataset_ratio > 0 or self.val_dataset)
+        if self.stream and self.num_beams != 1:
+            self.stream = False
+            logger.info('Setting args.stream: False')
+    def _init_ddp(self):
+        if not is_dist():
+            return
+        assert not self.eval_human and not self.stream, (
+            f'args.eval_human: {self.eval_human}, args.stream: {self.stream}')
+        self._init_device()
+        init_process_group(self.ddp_backend)
+    def __post_init__(self) -> None:
+        BaseArguments.__post_init__(self)
+        VllmArguments.__post_init__(self)
+        self._init_result_path('infer_result')
+        self._init_eval_human()
+        self._init_stream()
+        self._init_ddp()
+    def _init_eval_human(self):
+        if len(self.dataset) == 0 and len(self.val_dataset) == 0:
+            eval_human = True
+        else:
+            eval_human = False
+        self.eval_human = eval_human
+        logger.info(f'Setting args.eval_human: {self.eval_human}')

ms-swift/swift/llm/argument/train_args.py ADDED Viewed

	@@ -0,0 +1,234 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from dataclasses import dataclass, field
+from typing import Literal, Optional
+from transformers import Seq2SeqTrainingArguments
+from transformers.utils.versions import require_version
+from swift.plugin import LOSS_MAPPING
+from swift.trainers import TrainerFactory
+from swift.trainers.arguments import TrainArgumentsMixin
+from swift.utils import (add_version_to_work_dir, get_device_count, get_logger, get_pai_tensorboard_dir, is_master,
+                         is_mp, is_pai_training_job, is_swanlab_available)
+from .base_args import BaseArguments, to_abspath
+from .tuner_args import TunerArguments
+logger = get_logger()
+@dataclass
+class Seq2SeqTrainingOverrideArguments(TrainArgumentsMixin, Seq2SeqTrainingArguments):
+    """Override the default value in `Seq2SeqTrainingArguments`"""
+    output_dir: Optional[str] = None
+    learning_rate: Optional[float] = None
+    eval_strategy: Optional[str] = None  # steps, epoch
+    fp16: Optional[bool] = None
+    bf16: Optional[bool] = None
+    def _init_output_dir(self):
+        if self.output_dir is None:
+            self.output_dir = f'output/{self.model_suffix}'
+        self.output_dir = to_abspath(self.output_dir)
+    def _init_eval_strategy(self):
+        if self.eval_strategy is None:
+            self.eval_strategy = self.save_strategy
+        if self.eval_strategy == 'no':
+            self.eval_steps = None
+            self.split_dataset_ratio = 0.
+            logger.info(f'Setting args.split_dataset_ratio: {self.split_dataset_ratio}')
+        elif self.eval_strategy == 'steps' and self.eval_steps is None:
+            self.eval_steps = self.save_steps
+        self.evaluation_strategy = self.eval_strategy
+    def _init_metric_for_best_model(self):
+        if self.metric_for_best_model is None:
+            self.metric_for_best_model = 'rouge-l' if self.predict_with_generate else 'loss'
+    def __post_init__(self):
+        self._init_output_dir()
+        self._init_metric_for_best_model()
+        if self.greater_is_better is None and self.metric_for_best_model is not None:
+            self.greater_is_better = 'loss' not in self.metric_for_best_model
+        if self.learning_rate is None:
+            if self.train_type == 'full':
+                self.learning_rate = 1e-5
+            else:
+                self.learning_rate = 1e-4
+        self._init_eval_strategy()
+@dataclass
+class SwanlabArguments:
+    swanlab_token: Optional[str] = None
+    swanlab_project: Optional[str] = None
+    swanlab_workspace: Optional[str] = None
+    swanlab_exp_name: Optional[str] = None
+    swanlab_mode: Literal['cloud', 'local'] = 'cloud'
+    def _init_swanlab(self):
+        if not is_swanlab_available():
+            raise ValueError('You are using swanlab as `report_to`, please install swanlab by ' '`pip install swanlab`')
+        if not self.swanlab_exp_name:
+            self.swanlab_exp_name = self.output_dir
+        from transformers.integrations import INTEGRATION_TO_CALLBACK
+        import swanlab
+        from swanlab.integration.transformers import SwanLabCallback
+        if self.swanlab_token:
+            swanlab.login(self.swanlab_token)
+        INTEGRATION_TO_CALLBACK['swanlab'] = SwanLabCallback(
+            project=self.swanlab_project,
+            workspace=self.swanlab_workspace,
+            experiment_name=self.swanlab_exp_name,
+            config={'UPPERFRAME': '🐦‍⬛ms-swift'},
+            mode=self.swanlab_mode,
+        )
+@dataclass
+class TrainArguments(SwanlabArguments, TunerArguments, Seq2SeqTrainingOverrideArguments, BaseArguments):
+    """
+    TrainArguments class is a dataclass that inherits from multiple argument classes:
+    TunerArguments, Seq2SeqTrainingOverrideArguments, and BaseArguments.
+    Args:
+        add_version (bool): Flag to add version information to output_dir. Default is True.
+        resume_only_model (bool): Flag to resume training only the model. Default is False.
+        loss_type (Optional[str]): Type of loss function to use. Default is None.
+        packing (bool): Flag to enable packing of datasets. Default is False.
+        lazy_tokenize (Optional[bool]): Flag to enable lazy tokenization. Default is None.
+        max_new_tokens (int): Maximum number of new tokens to generate. Default is 64.
+        temperature (float): Temperature for sampling. Default is 0.
+        optimizer (Optional[str]): Optimizer type to use, define it in the plugin package. Default is None.
+        metric (Optional[str]): Metric to use for evaluation, define it in the plugin package. Default is None.
+    """
+    add_version: bool = True
+    resume_only_model: bool = False
+    create_checkpoint_symlink: bool = False
+    # dataset
+    packing: bool = False
+    lazy_tokenize: Optional[bool] = None
+    # plugin
+    loss_type: Optional[str] = field(default=None, metadata={'help': f'loss_func choices: {list(LOSS_MAPPING.keys())}'})
+    optimizer: Optional[str] = None
+    metric: Optional[str] = None
+    # extra
+    max_new_tokens: int = 64
+    temperature: float = 0.
+    load_args: bool = False
+    # zero++
+    zero_hpz_partition_size: Optional[int] = None
+    def _init_lazy_tokenize(self):
+        if self.streaming and self.lazy_tokenize:
+            self.lazy_tokenize = False
+            logger.warning('Streaming and lazy_tokenize are incompatible. '
+                           f'Setting args.lazy_tokenize: {self.lazy_tokenize}.')
+        if self.lazy_tokenize is None:
+            self.lazy_tokenize = self.model_meta.is_multimodal and not self.streaming
+            logger.info(f'Setting args.lazy_tokenize: {self.lazy_tokenize}')
+    def __post_init__(self) -> None:
+        if self.packing and self.attn_impl != 'flash_attn':
+            raise ValueError('The "packing" feature needs to be used in conjunction with "flash_attn". '
+                             'Please specify `--attn_impl flash_attn`.')
+        if self.resume_from_checkpoint:
+            self.resume_from_checkpoint = to_abspath(self.resume_from_checkpoint, True)
+            if self.resume_only_model:
+                if self.train_type == 'full':
+                    self.model = self.resume_from_checkpoint
+                else:
+                    self.adapters = [self.resume_from_checkpoint]
+        BaseArguments.__post_init__(self)
+        Seq2SeqTrainingOverrideArguments.__post_init__(self)
+        TunerArguments.__post_init__(self)
+        if self.optimizer is None:
+            if self.lorap_lr_ratio:
+                self.optimizer = 'lorap'
+            elif self.use_galore:
+                self.optimizer = 'galore'
+        if len(self.dataset) == 0:
+            raise ValueError(f'self.dataset: {self.dataset}, Please input the training dataset.')
+        self._handle_pai_compat()
+        self._init_deepspeed()
+        self._init_device()
+        self._init_lazy_tokenize()
+        if getattr(self, 'accelerator_config', None) is None:
+            self.accelerator_config = {'dispatch_batches': False}
+        self.training_args = TrainerFactory.get_training_args(self)
+        self.training_args.remove_unused_columns = False
+        self._add_version()
+        if 'swanlab' in self.report_to:
+            self._init_swanlab()
+    def _init_deepspeed(self):
+        if self.deepspeed:
+            require_version('deepspeed')
+            if is_mp():
+                raise ValueError('DeepSpeed is not compatible with `device_map`. '
+                                 f'n_gpu: {get_device_count()}, '
+                                 f'local_world_size: {self.local_world_size}.')
+            ds_config_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'ds_config'))
+            deepspeed_mapping = {
+                name: f'{name}.json'
+                for name in ['zero0', 'zero1', 'zero2', 'zero3', 'zero2_offload', 'zero3_offload']
+            }
+            for ds_name, ds_config in deepspeed_mapping.items():
+                if self.deepspeed == ds_name:
+                    self.deepspeed = os.path.join(ds_config_folder, ds_config)
+                    break
+            self.deepspeed = self.parse_to_dict(self.deepspeed)
+            if self.zero_hpz_partition_size is not None:
+                assert 'zero_optimization' in self.deepspeed
+                self.deepspeed['zero_optimization']['zero_hpz_partition_size'] = self.zero_hpz_partition_size
+                logger.warn('If `zero_hpz_partition_size`(ZeRO++) causes grad_norm NaN, please'
+                            ' try `--torch_dtype float16`')
+            logger.info(f'Using deepspeed: {self.deepspeed}')
+    def _handle_pai_compat(self) -> None:
+        if not is_pai_training_job():
+            return
+        logger.info('Handle pai compat...')
+        pai_tensorboard_dir = get_pai_tensorboard_dir()
+        if self.logging_dir is None and pai_tensorboard_dir is not None:
+            self.logging_dir = pai_tensorboard_dir
+            logger.info(f'Setting args.logging_dir: {self.logging_dir}')
+        self.add_version = False
+        logger.info(f'Setting args.add_version: {self.add_version}')
+    def _add_version(self):
+        """Prepare the output_dir"""
+        if self.add_version:
+            self.output_dir = add_version_to_work_dir(self.output_dir)
+            logger.info(f'output_dir: {self.output_dir}')
+        if self.logging_dir is None:
+            self.logging_dir = f'{self.output_dir}/runs'
+        self.logging_dir = to_abspath(self.logging_dir)
+        if is_master():
+            os.makedirs(self.output_dir, exist_ok=True)
+        if self.run_name is None:
+            self.run_name = self.output_dir
+        self.training_args.output_dir = self.output_dir
+        self.training_args.run_name = self.run_name
+        self.training_args.logging_dir = self.logging_dir

ms-swift/swift/llm/argument/tuner_args.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from dataclasses import dataclass, field
+from typing import List, Literal, Optional
+from transformers.utils import strtobool
+from swift.llm import get_model_arch
+from swift.utils import get_logger
+logger = get_logger()
+@dataclass
+class TunerArguments:
+    """
+    TunerArguments is a dataclass that holds configuration for various tuners.
+    Args:
+        target_modules (List[str]): List of target modules for tuning. Default is ['all-linear'].
+        target_regex (Optional[str]): Regular expression to match target modules. Default is None.
+        modules_to_save (List[str]): List of modules to save. Default is an empty list.
+        lora_rank (int): Rank for LoRA. Default is 8.
+        lora_alpha (int): Alpha value for LoRA. Default is 32.
+        lora_dropout (float): Dropout rate for LoRA. Default is 0.05.
+            Allowed values are 'none', 'all'.
+        lora_dtype (Literal): Data type for LoRA. Default is 'AUTO'. Allowed values are 'fp16', 'bf16', 'fp32', 'AUTO'.
+        lorap_lr_ratio (float): Learning rate ratio for LoRA. Default is None.
+        use_rslora (bool): Flag to indicate if RSLora is used. Default is False.
+        use_dora (bool): Flag to indicate if Dora is used. Default is False.
+        init_weights (str): Initialization method for weights of supported tuners. Default is 'true'.
+        lora_ga_batch_size (int): Batch size used for estimating gradients during initialization in LoRA-GA.
+                                    Default value is 2.
+        lora_ga_iters (int): Number of iterations for estimating gradients during initialization in LoRA-GA.
+                                Default value is 2.
+        lora_ga_max_length (int): Maximum input length for estimating gradients during initialization in LoRA-GA.
+                                    Default value is 1024.
+        lora_ga_direction (str): Initial direction used for gradient estimation during initialization in LoRA-GA.
+                                    Default value is `ArB2r`. Allowed: `ArBr`, `A2rBr`, `ArB2r`, and `random`.
+        lora_ga_scale (str): The scaling method for initialization in LoRA-GA.
+                                Default value is `stable`. Allowed values are: `gd`, `unit`, `stable`, and `weightS`.
+        lora_ga_stable_gamma (int): The gamma value when choosing `stable` scaling for initialization.
+                                    Default value is 16.
+        fourier_n_frequency (int): Number of frequencies for FourierFT. Default is 2000.
+        fourier_scaling (float): Scaling factor for FourierFT. Default is 300.0.
+        boft_block_size (int): Block size for BOFT. Default is 4.
+        boft_block_num (int): Number of blocks for BOFT. Default is 0.
+        boft_n_butterfly_factor (int): Butterfly factor for BOFT. Default is 1.
+        boft_dropout (float): Dropout rate for BOFT. Default is 0.0.
+        vera_rank (int): Rank for Vera. Default is 256.
+        vera_projection_prng_key (int): PRNG key for Vera projection. Default is 0.
+        vera_dropout (float): Dropout rate for Vera. Default is 0.0.
+        vera_d_initial (float): Initial value for Vera D. Default is 0.1.
+        adapter_act (str): Activation function for adapter. Default is 'gelu'.
+        adapter_length (int): Length of the adapter. Default is 128.
+        use_galore (bool): Flag to indicate if Galore is used. Default is False.
+        galore_target_modules (Optional[List[str]]): List of target modules for Galore. Default is None.
+        galore_rank (int): Rank for Galore. Default is 128.
+        galore_update_proj_gap (int): Update projection gap for Galore. Default is 50.
+        galore_scale (float): Scaling factor for Galore. Default is 1.0.
+        galore_proj_type (str): Projection type for Galore. Default is 'std'.
+        galore_optim_per_parameter (bool): Flag to indicate if optimization is per parameter for Galore.
+            Default is False.
+        galore_with_embedding (bool): Flag to indicate if embedding is used with Galore. Default is False.
+        galore_quantization (bool): Flag to indicate if use Q-Galore. Default is False.
+        galore_proj_quant (bool): Flag to indicate if projection quantization is used for Galore. Default is False.
+        galore_proj_bits (int): Number of bits for projection quantization. Default is 4.
+        galore_proj_group_size (int): Group size for projection quantization. Default is 256.
+        galore_cos_threshold (float): Cosine threshold for projection quantization. Default is 0.4.
+        galore_gamma_proj (int): Gamma for projection quantization. Default is 2.
+        galore_queue_size (int): Queue size for projection quantization. Default is 5.
+        adalora_target_r (int): Target rank for AdaLoRA. Default is 8.
+        adalora_init_r (int): Initial rank for AdaLoRA. Default is 12.
+        adalora_tinit (int): Initial T value for AdaLoRA. Default is 100.
+        adalora_tfinal (int): Final T value for AdaLoRA. Default is 1000.
+        adalora_deltaT (int): Delta T value for AdaLoRA. Default is 10.
+        adalora_beta1 (float): Beta1 value for AdaLoRA. Default is 0.85.
+        adalora_beta2 (float): Beta2 value for AdaLoRA. Default is 0.85.
+        adalora_orth_reg_weight (float): Orthogonal regularization weight for AdaLoRA. Default is 0.5.
+        llamapro_num_new_blocks (int): Number of new blocks for LLaMAPro. Default is 4.
+        llamapro_num_groups (Optional[int]): Number of groups for LLaMAPro. Default is None.
+        lisa_activated_layers (int): Number of activated layers for LISA. Default is 0.
+        lisa_step_interval (int): Step interval for LISA activation. Default is 20.
+        reft_layer_key (Optional[str]): Key identifier for ReFT layer. Default is None.
+        reft_layers (Optional[List[int]]): List of layers involved in ReFT. Default is None.
+        reft_rank (int): Rank parameter for ReFT. Default is 4.
+        reft_intervention_type (Literal): Type of intervention for ReFT. Default is 'LoreftIntervention'.
+        reft_args (Optional[str]): Additional arguments for ReFT. Default is None.
+    """
+    # full
+    freeze_parameters: List[str] = field(default_factory=list)
+    freeze_parameters_regex: Optional[str] = None
+    freeze_parameters_ratio: float = 0.  # 0 ~ 1
+    trainable_parameters: List[str] = field(default_factory=list)
+    trainable_parameters_regex: Optional[str] = None
+    # lora or full
+    freeze_llm: bool = False
+    freeze_vit: bool = True
+    freeze_aligner: bool = True
+    # tuners
+    target_modules: List[str] = field(default_factory=lambda: ['all-linear'])
+    target_regex: Optional[str] = None
+    # e.g. ['wte', 'ln_1', 'ln_2', 'ln_f', 'lm_head']
+    modules_to_save: List[str] = field(default_factory=list)
+    # lora
+    lora_rank: int = 8
+    lora_alpha: int = 32
+    lora_dropout: float = 0.05
+    lora_bias: Literal['none', 'all'] = 'none'
+    lora_dtype: Literal['float16', 'bfloat16', 'float32', None] = None
+    lorap_lr_ratio: Optional[float] = None
+    use_rslora: bool = False
+    use_dora: bool = False
+    # Lora: Literal['gaussian', 'pissa', 'pissa_niter_[number of iters]', 'olora', 'loftq', 'true', 'false', 'lora-ga']
+    lora_ga_batch_size: int = 2
+    lora_ga_iters: int = 2
+    lora_ga_max_length: int = 1024
+    lora_ga_direction: str = 'ArB2r'
+    lora_ga_scale: str = 'stable'
+    lora_ga_stable_gamma: int = 16
+    # Bone: Literal['bat', 'true', 'false']
+    init_weights: str = 'true'
+    # fourierft
+    fourier_n_frequency: int = 2000
+    fourier_scaling: float = 300.0
+    # BOFT
+    boft_block_size: int = 4
+    boft_block_num: int = 0
+    boft_n_butterfly_factor: int = 1
+    boft_dropout: float = 0.0
+    # Vera
+    vera_rank: int = 256
+    vera_projection_prng_key: int = 0
+    vera_dropout: float = 0.0
+    vera_d_initial: float = 0.1
+    # adapter
+    adapter_act: str = 'gelu'
+    adapter_length: int = 128
+    # galore
+    use_galore: bool = False
+    galore_target_modules: Optional[List[str]] = None
+    galore_rank: int = 128
+    galore_update_proj_gap: int = 50
+    galore_scale: float = 1.0
+    galore_proj_type: str = 'std'
+    galore_optim_per_parameter: bool = False
+    galore_with_embedding: bool = False
+    galore_quantization: bool = False
+    galore_proj_quant: bool = False
+    galore_proj_bits: int = 4
+    galore_proj_group_size: int = 256
+    galore_cos_threshold: float = 0.4
+    galore_gamma_proj: int = 2
+    galore_queue_size: int = 5
+    # adalora
+    adalora_target_r: int = 8
+    adalora_init_r: int = 12
+    adalora_tinit: int = 0
+    adalora_tfinal: int = 0
+    adalora_deltaT: int = 1
+    adalora_beta1: float = 0.85
+    adalora_beta2: float = 0.85
+    adalora_orth_reg_weight: float = 0.5
+    # llamapro
+    llamapro_num_new_blocks: int = 4
+    llamapro_num_groups: Optional[int] = None
+    # lisa
+    lisa_activated_layers: int = 0
+    lisa_step_interval: int = 20
+    # reft
+    reft_layer_key: Optional[str] = None
+    reft_layers: Optional[List[int]] = None
+    reft_rank: int = 4
+    reft_intervention_type: Literal['NoreftIntervention', 'LoreftIntervention', 'ConsreftIntervention',
+                                    'LobireftIntervention', 'DireftIntervention',
+                                    'NodireftIntervention'] = 'LoreftIntervention'
+    reft_args: Optional[str] = None
+    def __post_init__(self):
+        if isinstance(self.init_weights, str) and self.init_weights.lower() in {'true', 'false'}:
+            self.init_weights = bool(strtobool(self.init_weights))
+        self._init_multimodal_full()
+        if self.target_regex:
+            self.target_modules = self.target_regex
+    def _init_multimodal_full(self):
+        model_arch = get_model_arch(self.model_meta.model_arch)
+        if not self.model_meta.is_multimodal or not model_arch:
+            return
+        if self.freeze_llm:
+            self.freeze_parameters += model_arch.language_model
+        if self.freeze_vit:
+            self.freeze_parameters += model_arch.vision_tower
+        if self.freeze_aligner:
+            self.freeze_parameters += model_arch.aligner
+        else:
+            self.trainable_parameters += model_arch.aligner
+        self.freeze_parameters += model_arch.generator
+        if self.freeze_parameters:
+            logger.info(f'freeze_parameters: {self.freeze_parameters}')
+        if self.trainable_parameters:
+            logger.info(f'additional trainable_parameters: {self.trainable_parameters}')

ms-swift/swift/llm/dataset/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.41 kB). View file

ms-swift/swift/llm/dataset/dataset/mllm.py ADDED Viewed

	@@ -0,0 +1,1215 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import ast
+import os
+from typing import Any, Dict, Optional
+import numpy as np
+from datasets import Dataset as HfDataset
+from datasets import IterableDataset as HfIterableDataset
+from tqdm import tqdm
+from ..media import MediaResource
+from ..preprocessor import GroundingMixin, MessagesPreprocessor, ResponsePreprocessor, RowPreprocessor
+from ..register import DatasetMeta, SubsetDataset, register_dataset
+class ShareGPT4oPreprocessor(MessagesPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        row = super().preprocess(row)
+        image = row['images']
+        if not image:
+            return
+        image = os.path.join(self.prefix_path, image)
+        if not os.path.exists(image):
+            return
+        row['images'] = [image]
+        return row
+    def prepare_dataset(self, dataset):
+        url = ('https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/ShareGPT-4o/repo?'
+               'Revision=master&FilePath=images.zip')
+        local_dir = MediaResource.download(url, 'sharegpt_4o_images')
+        self.prefix_path = os.path.join(local_dir, 'mnt', 'petrelfs', 'wangwenhai', 'workspace_cef', '4o', 'image')
+        return super().prepare_dataset(dataset)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/ShareGPT-4o',
+        hf_dataset_id='OpenGVLab/ShareGPT-4o',
+        preprocess_func=ShareGPT4oPreprocessor(),
+        subsets=['image_caption'],
+        split=['images'],
+        tags=['vqa', 'multi-modal'],
+    ))
+class GPT4vDataset(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        row['query'] = 'What is the caption of this image?'
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/gpt4v-dataset',
+        hf_dataset_id='laion/gpt4v-dataset',
+        preprocess_func=GPT4vDataset(columns={
+            'link': 'images',
+            'caption': 'response'
+        }),
+        split=['train'],
+        tags=['en', 'caption', 'multi-modal', 'quality'],
+        huge_dataset=True,
+    ))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/RLAIF-V-Dataset',
+        hf_dataset_id='openbmb/RLAIF-V-Dataset',
+        preprocess_func=ResponsePreprocessor(columns={
+            'question': 'query',
+            'chosen': 'response',
+            'rejected': 'rejected_response'
+        }),
+        tags=['rlhf', 'dpo', 'multi-modal', 'en'],
+    ))
+class GarbagePreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        row['query'] = 'Task: Classify household waste.'
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='tany0699/garbage265',
+        preprocess_func=GarbagePreprocessor(columns={
+            'category': 'label',
+            'image:FILE': 'images'
+        }),
+        tags=['cls', '🔥', 'multi-modal'],
+    ))
+class SA1BPairedCaptionPreprocessor(RowPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        prompt = ['图片中展示了什么', '讲述一下图片中内容', '告诉我里面有什么', '图片内容是啥']
+        response = row['global_caption']
+        query = np.random.choice(prompt)
+        return {
+            'messages': [{
+                'role': 'user',
+                'content': query,
+            }, {
+                'role': 'assistant',
+                'content': response,
+            }]
+        }
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='Tongyi-DataEngine/SA1B-Paired-Captions-Images',
+        preprocess_func=SA1BPairedCaptionPreprocessor(columns={
+            'opensource_url': 'images',
+        }),
+        tags=['zh', 'multi-modal', 'vqa'],
+    ))
+class SA1BDenseCaptionPreprocessor(RowPreprocessor):
+    column_mapping = {
+        'url': 'images',
+    }
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        prompt = ['图片中展示了什么', '讲述一下图片中内容', '告诉我里面有什么', '图片内容是啥']
+        response = ast.literal_eval(row['cap_seg'])
+        response = response.get('global_caption')
+        query = np.random.choice(prompt)
+        return {
+            'messages': [{
+                'role': 'user',
+                'content': query,
+            }, {
+                'role': 'assistant',
+                'content': response,
+            }]
+        }
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='Tongyi-DataEngine/SA1B-Dense-Caption',
+        preprocess_func=SA1BDenseCaptionPreprocessor(columns={
+            'url': 'images',
+        }),
+        tags=['zh', 'multi-modal', 'vqa'],
+        huge_dataset=True,
+    ))
+class COCO2014Preprocess(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        caption = row['caption']
+        if '&&' in caption:
+            caption = caption.split('&&')[0]
+        row['query'] = 'please describe the image.'
+        row['response'] = caption
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='modelscope/coco_2014_caption',
+        preprocess_func=COCO2014Preprocess(),
+        subsets=[
+            SubsetDataset('train', 'coco_2014_caption', ['train']),
+            SubsetDataset('validation', 'coco_2014_caption', ['validation']),
+        ],
+        tags=['chat', 'multi-modal', 'vision', '🔥'],
+    ))
+class MantisPreprocessor(MessagesPreprocessor):
+    def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] = None) -> None:
+        self.subset = subset
+        super().__init__(columns=columns)
+    def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
+        url = (f'https://www.modelscope.cn/api/v1/datasets/swift/Mantis-Instruct/repo?Revision='
+               f'master&FilePath={self.subset}/train_images.zip')  # noqa
+        self.local_dir = MediaResource.download(url, f'mantis_{self.subset}')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        images = [os.path.join(self.local_dir, p['path']) for p in row['images']]
+        if not all([os.path.exists(d) for d in images]):
+            images = []
+        if not images:
+            return
+        row['images'] = images
+        return super().preprocess(row)
+mantis_subsets_name = [
+    'birds-to-words', 'chartqa', 'coinstruct', 'contrastive_caption', 'docvqa', 'dreamsim', 'dvqa', 'iconqa',
+    'imagecode', 'llava_665k_multi', 'lrv_multi', 'multi_vqa', 'nextqa', 'nlvr2', 'spot-the-diff', 'star',
+    'visual_story_telling'
+]
+_mantis_subsets = []
+for subset in mantis_subsets_name:
+    _subset = SubsetDataset(subset=subset, split=['train'], preprocess_func=MantisPreprocessor(subset=subset))
+    _mantis_subsets.append(_subset)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/Mantis-Instruct',
+        subsets=_mantis_subsets,
+        tags=['chat', 'multi-modal', 'vision'],
+    ))
+class LLaVADataPreprocessor(MessagesPreprocessor):
+    def prepare_dataset(self, dataset):
+        self.all_folders = {}
+        for media_type in ['coco', 'gqa', 'ocr_vqa', 'textvqa', 'VG_100K', 'VG_100K_2']:
+            self.all_folders[media_type] = MediaResource.download(media_type)
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if not row['images']:
+            return
+        row = super().preprocess(row)
+        images = [p['path'] for p in row['images']]
+        new_images = []
+        for image in images:
+            if 'coco/' in image:
+                image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
+            elif 'gqa/' in image:
+                image = os.path.join(self.all_folders['gqa'], image.replace('gqa/', ''))
+            elif 'ocr_vqa/' in image:
+                image = os.path.join(self.all_folders['ocr_vqa'], image)
+            elif 'textvqa/' in image:
+                image = os.path.join(self.all_folders['textvqa'], image.replace('textvqa/', ''))
+            elif 'VG_100K/' in image:
+                image = os.path.join(self.all_folders['VG_100K'], image.replace('vg/', ''))
+            elif 'VG_100K_2/' in image:
+                image = os.path.join(self.all_folders['VG_100K_2'], image.replace('vg/', ''))
+            new_images.append(image)
+        if all(os.path.exists(image) for image in new_images):
+            row['images'] = new_images
+        else:
+            return {'images': None}
+        return row
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/llava-data',
+        hf_dataset_id='TIGER-Lab/llava-data',
+        subsets=['llava_instruct'],
+        preprocess_func=LLaVADataPreprocessor(),
+        tags=['sft', 'multi-modal', 'quality'],
+    ))
+class PixelProsePreprocessor(RowPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        caption_prompt = [
+            'Give the description of this image.', 'Describe this picture', 'What is the proper title of this image?'
+        ]
+        vlm_caption = row['vlm_caption']
+        if vlm_caption.startswith('This image displays:'):
+            vlm_caption = vlm_caption[len('This image displays:'):].strip()
+        return {
+            'messages': [{
+                'role': 'user',
+                'content': np.random.choice(caption_prompt)
+            }, {
+                'role': 'assistant',
+                'content': vlm_caption
+            }],
+            'images':
+            row['url']
+        }
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/pixelprose',
+        hf_dataset_id='tomg-group-umd/pixelprose',
+        preprocess_func=PixelProsePreprocessor(),
+        split=['train', 'cc12m', 'commonpool', 'redcaps'],
+        tags=['caption', 'multi-modal', 'vision'],
+        huge_dataset=True,
+    ))
+class AIShell1Preprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        row['query'] = '语音转文本'
+        row['response'] = row['Text:LABEL'].replace(' ', '')
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='speech_asr/speech_asr_aishell1_trainsets',
+        subsets=[
+            SubsetDataset('train', split=['train']),
+            SubsetDataset('validation', split=['validation']),
+            SubsetDataset('test', split=['test']),
+        ],
+        preprocess_func=AIShell1Preprocessor(columns={'Audio:FILE': 'audios'}),
+        tags=['chat', 'multi-modal', 'audio'],
+    ))
+class EmoSchemaPreprocessor(ResponsePreprocessor):
+    def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
+        for i in range(1, 6):
+            url = f'https://modelscope.cn/datasets/AI-ModelScope/egoschema/resolve/master/videos_chunked_0{i}.zip'
+            local_dir = MediaResource.download(url, 'egoschema')
+        self.local_dir = os.path.join(local_dir, 'videos')
+        self.mp4_set = [file[:-4] for file in os.listdir(self.local_dir) if file.endswith('mp4')]
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if row['video_idx'] not in self.mp4_set:
+            return None
+        transfer_to_option = {
+            '0': 'A',
+            '1': 'B',
+            '2': 'C',
+            '3': 'D',
+            '4': 'E',
+        }
+        row = {
+            'query': row['query'] + '\n' + '\n'.join(row['option']),
+            'response': transfer_to_option[row['response']],
+            'videos': [os.path.join(self.local_dir, f"{row['video_idx']}.mp4")],
+        }
+        return super().preprocess(row)
+class EmoSchemaClsPreprocessor(EmoSchemaPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if row['video_idx'] not in self.mp4_set:
+            return None
+        row = {
+            'query': row['query'] + '\n' + '\n'.join(row['option']),
+            'label': int(row['response']),
+            'videos': [os.path.join(self.local_dir, f"{row['video_idx']}.mp4")],
+        }
+        return ResponsePreprocessor.preprocess(self, row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/egoschema',
+        hf_dataset_id='lmms-lab/egoschema',
+        subsets=[
+            SubsetDataset('default', 'Subset', preprocess_func=EmoSchemaPreprocessor()),
+            SubsetDataset('cls', 'Subset', preprocess_func=EmoSchemaClsPreprocessor())
+        ],
+        split=['test'],
+        tags=['chat', 'multi-modal', 'video'],
+    ))
+def _generate_url_list(_url, _range):
+    lst = []
+    for i in range(1, (_range + 1)):
+        lst.append(_url.replace('{}', str(i)))
+    return lst
+class LLaVAVideo178KPreprocessor(MessagesPreprocessor):
+    def __init__(self, *, subset: str, columns: Optional[Dict[str, str]] = None) -> None:
+        self.subset = subset
+        super().__init__(columns=columns)
+    video_resources = {
+        '0_30_s_academic_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '0_30_s_academic_v0_1/0_30_s_academic_v0_1_videos_{}.tar.gz',
+            8,
+        ),
+        '0_30_s_youtube_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '0_30_s_youtube_v0_1/0_30_s_youtube_v0_1_videos_{}.tar.gz',
+            19,
+        ),
+        '1_2_m_academic_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '1_2_m_academic_v0_1/1_2_m_academic_v0_1_videos_{}.tar.gz',
+            14,
+        ),
+        '1_2_m_youtube_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '1_2_m_youtube_v0_1/1_2_m_youtube_v0_1_videos_{}.tar.gz',
+            50,
+        ),
+        '2_3_m_academic_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '2_3_m_academic_v0_1/2_3_m_academic_v0_1_videos_{}.tar.gz',
+            18,
+        ),
+        '2_3_m_youtube_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '2_3_m_youtube_v0_1/2_3_m_youtube_v0_1_videos_{}.tar.gz',
+            98,
+        ),
+        '30_60_s_academic_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '30_60_s_academic_v0_1/30_60_s_academic_v0_1_videos_{}.tar.gz',
+            10,
+        ),
+        '30_60_s_youtube_v0_1':
+        _generate_url_list(
+            'https://www.modelscope.cn/datasets/lmms-lab/LLaVA-Video-178K/resolve/master/'
+            '30_60_s_youtube_v0_1/30_60_s_youtube_v0_1_videos_{}.tar.gz',
+            13,
+        ),
+    }
+    def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
+        urls = self.video_resources[self.subset]
+        self.local_dir = MediaResource.download(urls, f'llava_video_178k_{self.subset}', file_type='sharded')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        file_path = os.path.join(self.local_dir, f"{row['videos']}")
+        if not os.path.exists(file_path):
+            return None
+        return super().preprocess({'messages': row['messages'], 'videos': file_path})
+llava_video_subsets = []
+for subset in [
+        '0_30_s_academic_v0_1',
+        '0_30_s_youtube_v0_1',
+        '1_2_m_academic_v0_1',
+        '1_2_m_youtube_v0_1',
+        '2_3_m_academic_v0_1',
+        '2_3_m_youtube_v0_1',
+        '30_60_s_academic_v0_1',
+        '30_60_s_youtube_v0_1',
+]:
+    subset = SubsetDataset(
+        subset=subset,
+        split=['caption', 'open_ended', 'multi_choice'],
+        preprocess_func=LLaVAVideo178KPreprocessor(subset=subset),
+    )
+    llava_video_subsets.append(subset)
+register_dataset(
+    DatasetMeta(
+        hf_dataset_id='lmms-lab/LLaVA-Video-178K', subsets=llava_video_subsets, tags=['chat', 'multi-modal', 'video']))
+class MovieChat1KPreprocessor(ResponsePreprocessor):
+    def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
+        mp4_set = [f'{i}.mp4' for i in range(1, 10)] + \
+                  [f'{i}.mp4' for i in range(201, 240)] + \
+                  [f'AWA-{i}.mp4' for i in range(1, 10)] + \
+                  [f'AWB-{i}.mp4' for i in range(1, 16)] + \
+                  [f'AWC-{i}.mp4' for i in range(1, 11)] + \
+                  [f'AWD-{i}.mp4' for i in range(1, 8)] + \
+                  [f'AWE-{i}.mp4' for i in range(1, 7)] + \
+                  [f'AWG-{i}.mp4' for i in range(1, 12)] + \
+                  [f'AWH-{i}.mp4' for i in range(1, 8)] + \
+                  [f'BWA-{i}.mp4' for i in range(1, 7)] + \
+                  [f'BWB-{i}.mp4' for i in range(1, 7)] + \
+                  [f'BWD-{i}.mp4' for i in range(1, 6)] + \
+                  [f'BWE-{i}.mp4' for i in range(1, 6)] + \
+                  [f'BWG-{i}.mp4' for i in range(1, 6)] + \
+                  [f'BWH-{i}.mp4' for i in range(1, 6)] + \
+                  [f'TFS-{i}.mp4' for i in range(1, 13)] + \
+                  [f'UWA-{i}.mp4' for i in range(1, 5)] + ['UWA-6.mp4']
+        for file in mp4_set:
+            url = f'https://modelscope.cn/datasets/AI-ModelScope/MovieChat-1K-test/resolve/master/videos/{file}'
+            self.local_dir = MediaResource.download(url, 'moviechat_1k_test', file_type='file')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        file_path = os.path.join(self.local_dir, f"{row['info']['video_path']}")
+        if not os.path.exists(file_path):
+            return None
+        return super().preprocess({
+            'query': row['global'][0]['question'],
+            'response': row['global'][0]['answer'],
+            'videos': file_path,
+        })
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/MovieChat-1K-test',
+        hf_dataset_id='Enxin/MovieChat-1K-test',
+        preprocess_func=MovieChat1KPreprocessor(),
+        split=['train'],
+        tags=['chat', 'multi-modal', 'video']))
+class VideoChatGPTPreprocessor(ResponsePreprocessor):
+    def prepare_dataset(self, dataset: HfDataset) -> HfDataset:
+        url = 'https://modelscope.cn/datasets/swift/VideoChatGPT/resolve/master/videos.zip'
+        local_dir = MediaResource.download(url, 'video_chatgpt')
+        self.local_dir = os.path.join(local_dir, 'Test_Videos')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        # only `.mp4`
+        mp4_set = [file[:-4] for file in os.listdir(self.local_dir) if file.endswith('mp4')]
+        if row['video_name'] not in mp4_set:
+            return
+        row['videos'] = os.path.join(self.local_dir, f"{row['video_name']}.mp4")
+        for key in ['query', 'question_1', 'question_2']:
+            query = row.get(key)
+            if query is None or query == 'None':
+                continue
+            row['query'] = query
+            return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/VideoChatGPT',
+        hf_dataset_id='lmms-lab/VideoChatGPT',
+        subsets=['Generic', 'Temporal', 'Consistency'],
+        preprocess_func=VideoChatGPTPreprocessor(),
+        split=['test'],
+        tags=['chat', 'multi-modal', 'video', '🔥'],
+    ))
+def preprocess_mind2web(dataset, **kwargs):
+    def preprocess_row(row: Dict[str, Any]) -> Dict[str, Any]:
+        raw_html = row['cleaned_html']
+        screenshot = row['screenshot']
+        row['screenshot'] = MediaResource.safe_save(screenshot, row['action_uid'] + '.jpg', 'mind2web')
+        action = row['target_action_reprs']
+        actions = action.split('->')
+        row['query'] = f'The snapshot of screen:<image>\nThe html source code:{raw_html}\n'
+        action = actions[-1]
+        where = actions[0] if len(actions) > 1 else ''
+        what = ''
+        if ':' in action:
+            action, what = action[:action.find(':')], action[action.find(':') + 1:]
+        row['response'] = f'Action: {action.strip()}\nAction Input: {where.strip()}{"," + what.strip()}'
+        return row
+    conversations = []
+    tools = [{
+        'function': {
+            'name': 'CLICK',
+            'desc': 'Choose and click an element in the web page',
+            'parameter': [{
+                'element': 'string, the element in the web page to click'
+            }]
+        }
+    }, {
+        'function': {
+            'name':
+            'TYPE',
+            'desc':
+            'Input some text into a web element like <input> or <textbox>',
+            'parameter': [{
+                'element': 'string, the element in the web page to input to',
+                'content': 'string, what content to input into the textbox element'
+            }]
+        }
+    }, {
+        'function': {
+            'name':
+            'SELECT',
+            'desc':
+            'Select an element from a combobox',
+            'parameter': [{
+                'element': 'string, the combobox or dropdown in the web page on which the select happens',
+                'content': 'string, which choices to choose'
+            }]
+        }
+    }]
+    def history_to_messages(history):
+        messages = []
+        for h in history:
+            messages.append({'role': 'user', 'content': h[0]})
+            messages.append({'role': 'assistant', 'content': h[1]})
+        return messages
+    if isinstance(dataset, HfIterableDataset):
+        def generate_example(dataset):
+            history = []
+            images = []
+            for row in dataset:
+                target_action_index = row['target_action_index']
+                row = preprocess_row(row)
+                query = row['query']
+                if target_action_index == '0':
+                    if history:
+                        yield {'messages': history_to_messages(history), 'images': images, 'tools': tools}
+                        images = []
+                        history = []
+                    query = query + '\n' + row['confirmed_task']
+                history.append([query, row['response']])
+                images.append(row['screenshot'])
+            if history:
+                yield {'messages': history_to_messages(history), 'images': images, 'tools': tools}
+        return HfIterableDataset.from_generator(generate_example, gen_kwargs={'dataset': dataset})
+    history = []
+    images = []
+    for row in tqdm(dataset):
+        target_action_index = row['target_action_index']
+        row = preprocess_row(row)
+        query = row['query']
+        if target_action_index == '0':
+            if history:
+                conversations.append({'messages': history_to_messages(history), 'images': images, 'tools': tools})
+                images = []
+                history = []
+            query = query + '\n' + row['confirmed_task']
+        history.append([query, row['response']])
+        images.append(row['screenshot'])
+    if history:
+        conversations.append({'messages': history_to_messages(history), 'images': images, 'tools': tools})
+    return HfDataset.from_list(conversations)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/Multimodal-Mind2Web',
+        hf_dataset_id='osunlp/Multimodal-Mind2Web',
+        preprocess_func=preprocess_mind2web,
+        tags=['agent', 'multi-modal']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/M3IT',
+        subsets=[
+            'coco', 'vqa-v2', 'shapes', 'shapes-rephrased', 'coco-goi-rephrased', 'snli-ve', 'snli-ve-rephrased',
+            'okvqa', 'a-okvqa', 'viquae', 'textcap', 'docvqa', 'science-qa', 'imagenet', 'imagenet-open-ended',
+            'imagenet-rephrased', 'coco-goi', 'clevr', 'clevr-rephrased', 'nlvr', 'coco-itm', 'coco-itm-rephrased',
+            'vsr', 'vsr-rephrased', 'mocheg', 'mocheg-rephrased', 'coco-text', 'fm-iqa', 'activitynet-qa', 'msrvtt',
+            'ss', 'coco-cn', 'refcoco', 'refcoco-rephrased', 'multi30k', 'image-paragraph-captioning', 'visual-dialog',
+            'visual-dialog-rephrased', 'iqa', 'vcr', 'visual-mrc', 'ivqa', 'msrvtt-qa', 'msvd-qa', 'gqa', 'text-vqa',
+            'ocr-vqa', 'st-vqa', 'flickr8k-cn'
+        ],
+        preprocess_func=ResponsePreprocessor(columns={
+            'instruction': 'system',
+            'inputs': 'query',
+            'image_base64_str': 'images',
+            'outputs': 'response'
+        }),
+        split=['train'],
+        huge_dataset=True,
+        tags=['chat', 'multi-modal', 'vision']))
+class ShareGPT4VPreprocessor(MessagesPreprocessor):
+    def prepare_dataset(self, dataset):
+        split = ['ShareGPT4V', 'ShareGPT4V-PT'] if dataset.config_name is None else dataset.config_name
+        IMAGE_DATASET_REQUIREMENTS = {
+            'ShareGPT4V': ['coco', 'sam', 'llava', 'wikiart', 'share_textvqa', 'web-celebrity', 'web-landmark'],
+            'ShareGPT4V-PT': ['coco', 'sam', 'llava']
+        }
+        if isinstance(split, str):
+            split = [split]
+        self.all_folders = {}
+        for sp in split:
+            for media_type in IMAGE_DATASET_REQUIREMENTS[sp]:
+                self.all_folders[media_type] = MediaResource.download(media_type)
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        image = row['image']
+        row.update(super().preprocess(row))
+        if 'coco/' in image:
+            image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
+        elif 'sam/' in image:
+            image = os.path.join(self.all_folders['sam'], image.replace('sam/images/', ''))
+        elif 'llava/' in image:
+            image = os.path.join(self.all_folders['llava'], image.replace('llava/llava_pretrain/images/', ''))
+        elif 'wikiart/' in image:
+            image = os.path.join(self.all_folders['wikiart'], image.replace('wikiart/images/', 'data/wikiart/images/'))
+        elif 'share_textvqa/' in image:
+            image = os.path.join(self.all_folders['share_textvqa'],
+                                 image.replace('share_textvqa/images/', 'data/share_textvqa/images/'))
+        elif 'web-celebrity/' in image:
+            image = os.path.join(self.all_folders['web-celebrity'],
+                                 image.replace('web-celebrity/images/', 'data/web-celebrity/images/'))
+        elif 'web-landmark/' in image:
+            image = os.path.join(self.all_folders['web-landmark'],
+                                 image.replace('web-landmark/images/', 'data/web-landmark/images/'))
+        if os.path.exists(image):
+            row['images'] = image
+        else:
+            return
+        return row
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/ShareGPT4V',
+        subsets=['ShareGPT4V', 'ShareGPT4V-PT'],
+        preprocess_func=ShareGPT4VPreprocessor(),
+        huge_dataset=True,
+        tags=['chat', 'multi-modal', 'vision']))
+class TextCapsPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        row['query'] = 'What is the caption of this image?'
+        if not os.path.exists(row['images']['path']):
+            return None
+        return super().preprocess(row)
+class TextCapsEmbPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        row['query'] = ''
+        if not os.path.exists(row['images']['path']):
+            return None
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/TextCaps',
+        hf_dataset_id='HuggingFaceM4/TextCaps',
+        subsets=[
+            SubsetDataset(
+                name='default',
+                preprocess_func=TextCapsPreprocessor(columns={'reference_strs': 'response'}),
+                split=['train', 'validation'],
+            ),
+            SubsetDataset(
+                name='emb',
+                preprocess_func=TextCapsEmbPreprocessor(columns={'reference_strs': 'response'}),
+                split=['train', 'validation'],
+            ),
+        ],
+        huge_dataset=True,
+        tags=['multi-modal', 'en', 'caption', 'quality']))
+class RefCOCOPreprocessor(ResponsePreprocessor, GroundingMixin):
+    task_type = 'caption'
+    def __init__(self, task_type, **kwargs):
+        self.task_type = task_type
+        super().__init__(**kwargs)
+    def prepare_dataset(self, dataset):
+        self.cache_dir = MediaResource.download(
+            'https://www.modelscope.cn/api/v1/datasets/we_dont_produce_water/'
+            'coco_res/repo?Revision=master&FilePath=coco_2014.zip', 'coco2014')
+        return dataset
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        caption = row['captions'][0]
+        bbox = row['bbox']
+        image_path = os.path.join(self.cache_dir, row['image_path'].replace('coco/train2014', 'train2014'))
+        if not os.path.exists(image_path):
+            return
+        for i in range(len(bbox)):
+            bbox[i] = round(float(bbox[i]))
+        res = {}
+        objects = {
+            'ref': [caption],
+            'bbox': [bbox],
+        }
+        res['query'], res['response'] = self.construct_grounding_prompt()
+        res['images'] = [image_path]
+        res['objects'] = objects
+        return super().preprocess(res)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/refcoco',
+        hf_dataset_id='jxu124/refcoco',
+        subsets=[
+            SubsetDataset(
+                name='caption',
+                preprocess_func=RefCOCOPreprocessor('caption'),
+            ),
+            SubsetDataset(
+                name='grounding',
+                preprocess_func=RefCOCOPreprocessor('grounding'),
+            )
+        ],
+        split=['train', 'validation'],
+        tags=['multi-modal', 'en', 'grounding']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/refcocog',
+        hf_dataset_id='jxu124/refcocog',
+        subsets=[
+            SubsetDataset(
+                name='caption',
+                preprocess_func=RefCOCOPreprocessor('caption'),
+            ),
+            SubsetDataset(
+                name='grounding',
+                preprocess_func=RefCOCOPreprocessor('grounding'),
+            )
+        ],
+        split=['train', 'validation'],
+        tags=['multi-modal', 'en', 'grounding']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/lnqa',
+        hf_dataset_id='vikhyatk/lnqa',
+        preprocess_func=MessagesPreprocessor(user_role='question', assistant_role='answer'),
+        split=['train', 'validation'],
+        huge_dataset=True,
+        tags=['multi-modal', 'en', 'ocr-vqa', 'quality']))
+class LLaVAInstructPreprocessor(MessagesPreprocessor):
+    def prepare_dataset(self, dataset):
+        self.all_folders = {}
+        for media_type in ['coco', 'gqa', 'ocr_vqa', 'textvqa', 'VG_100K', 'VG_100K_2']:
+            self.all_folders[media_type] = MediaResource.download(media_type)
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        image = row['images']
+        if 'coco/' in image:
+            image = os.path.join(self.all_folders['coco'], image.replace('coco/', ''))
+        elif 'gqa/' in image:
+            image = os.path.join(self.all_folders['gqa'], image.replace('gqa/', ''))
+        elif 'ocr_vqa/' in image:
+            image = os.path.join(self.all_folders['ocr_vqa'], image)
+        elif 'textvqa/' in image:
+            image = os.path.join(self.all_folders['textvqa'], image.replace('textvqa/', ''))
+        elif 'VG_100K/' in image:
+            image = os.path.join(self.all_folders['VG_100K'], image.replace('vg/', ''))
+        elif 'VG_100K_2/' in image:
+            image = os.path.join(self.all_folders['VG_100K_2'], image.replace('vg/', ''))
+        if os.path.exists(image):
+            row['images'] = image
+        else:
+            return
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/LLaVA-Instruct-150K',
+        ms_revision='d5db3806e395c60496630a206c336932e85a2d00',
+        preprocess_func=LLaVAInstructPreprocessor(),
+        split=['train'],
+        tags=['chat', 'multi-modal', 'vision']))
+class LLaVAPretrainPreprocessor(MessagesPreprocessor):
+    def prepare_dataset(self, dataset):
+        self.media_dir = MediaResource.download(
+            ('https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/LLaVA-Pretrain/repo?'
+             'Revision=master&FilePath=images.zip'),
+            # noqa
+            'llava_pretrain')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        row.update(super().preprocess(row))
+        if row['image']:
+            file_path = os.path.join(self.media_dir, row['image'])
+            if os.path.exists(file_path):
+                return {'images': file_path}
+            else:
+                return
+        else:
+            return
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/LLaVA-Pretrain',
+        ms_revision='e3a3f0bfaad05e90e46745152a32bf944e0f4a63',
+        hf_dataset_id='liuhaotian/LLaVA-Pretrain',
+        preprocess_func=LLaVAPretrainPreprocessor(),
+        huge_dataset=True,
+        tags=['chat', 'multi-modal', 'quality']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/MideficsDataset',
+        hf_dataset_id='WinterSchool/MideficsDataset',
+        preprocess_func=MessagesPreprocessor(inner_key='data', user_role='question', assistant_role='answer'),
+        tags=['medical', 'en', 'vqa']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/OK-VQA_train',
+        hf_dataset_id='Multimodal-Fatima/OK-VQA_train',
+        preprocess_func=ResponsePreprocessor(),
+        tags=['multi-modal', 'en', 'vqa', 'quality']))
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/A-OKVQA',
+        hf_dataset_id='HuggingFaceM4/A-OKVQA',
+        split=['train', 'validation'],
+        preprocess_func=ResponsePreprocessor(columns={'rationales': 'response'}),
+        tags=['multi-modal', 'en', 'vqa', 'quality']))
+class OcrvqaPreprocessor(RowPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        idx = np.random.choice(range(len(row['questions'])))
+        query = row['questions'][idx]
+        response = row['answers'][idx]
+        return {
+            'messages': [{
+                'role': 'user',
+                'content': query
+            }, {
+                'role': 'assistant',
+                'content': response
+            }],
+        }
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/OCR-VQA',
+        hf_dataset_id='howard-hou/OCR-VQA',
+        split=['train', 'validation'],
+        preprocess_func=OcrvqaPreprocessor(),
+        tags=['multi-modal', 'en', 'ocr-vqa']))
+class ScienceQAPreprocessor(RowPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        query = row['question']
+        response = row['choices'][row['answer']]
+        solution = row['solution']
+        response = f'{solution}\nSo the final answer is: {response}'
+        return {'messages': [{'role': 'user', 'content': query}, {'role': 'assistant', 'content': response}]}
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/ScienceQA',
+        hf_dataset_id='derek-thomas/ScienceQA',
+        split=['train', 'validation'],
+        preprocess_func=ScienceQAPreprocessor(),
+        tags=['multi-modal', 'science', 'vqa', 'quality']))
+class GritPreprocessor(RowPreprocessor, GroundingMixin):
+    def __init__(self, task_type, **kwargs):
+        self.task_type = task_type
+        super().__init__(**kwargs)
+    @staticmethod
+    def has_overlap(start_ends):
+        for i in range(1, len(start_ends)):
+            if start_ends[i][0] < start_ends[i - 1][1]:
+                return True
+        return False
+    @staticmethod
+    def replace_intervals_with_tags(response, start_ends):
+        result = []
+        last_end = 0
+        for start, end in start_ends:
+            result.append(response[int(last_end):int(start)])
+            result.append('<ref-object><bbox>')
+            last_end = end
+        result.append(response[int(last_end):])
+        return ''.join(result)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        images = row['images']
+        caption = row['caption']
+        ref_exps = row['ref_exps']
+        objects = {'ref': [], 'bbox': [], 'bbox_type': 'norm1'}
+        start_end_pairs = []
+        for ref_exp in ref_exps:
+            start = ref_exp[0]
+            end = ref_exp[1]
+            # conf = ref_exp[6] TODO filter low confidence rows?
+            start_end_pairs.append(ref_exp[0:2])
+            object_part = caption[int(start):int(end)]
+            objects['ref'].append(object_part)
+            objects['bbox'].append(ref_exp[2:6])
+        start_end_pairs.sort(key=lambda x: (x[0], x[1]))
+        if self.has_overlap(start_end_pairs) or not ref_exps:
+            return
+        if self.task_type in ('grounding', 'caption'):
+            query, response = self.construct_grounding_prompt()
+        else:
+            query = 'what is the proper caption of this image?'
+            response = caption
+        return {
+            'messages': [{
+                'role': 'user',
+                'content': query
+            }, {
+                'role': 'assistant',
+                'content': response
+            }],
+            'images': images,
+            'objects': objects
+        }
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/GRIT',
+        hf_dataset_id='zzliang/GRIT',
+        subsets=[
+            SubsetDataset(
+                name='caption',
+                preprocess_func=GritPreprocessor('caption', columns={'url': 'images'}),
+            ),
+            SubsetDataset(
+                name='grounding',
+                preprocess_func=GritPreprocessor('grounding', columns={'url': 'images'}),
+            ),
+            SubsetDataset(
+                name='vqa',
+                preprocess_func=GritPreprocessor('vqa', columns={'url': 'images'}),
+            )
+        ],
+        huge_dataset=True,
+        tags=['multi-modal', 'en', 'caption-grounding', 'vqa', 'quality']))
+class GQAPreprocessor(RowPreprocessor):
+    def prepare_dataset(self, dataset):
+        self.local_cache = MediaResource.download('gqa')
+        return super().prepare_dataset(dataset)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if os.path.join(self.local_cache, 'images', row['imageId'] + '.jpg'):
+            return {
+                'messages': [{
+                    'role': 'user',
+                    'content': row['question']
+                }, {
+                    'role': 'assistant',
+                    'content': row['fullAnswer']
+                }],
+                'images':
+                os.path.join(self.local_cache, 'images', row['imageId'] + '.jpg'),
+            }
+        else:
+            return
+register_dataset(
+    DatasetMeta(
+        hf_dataset_id='lmms-lab/GQA',
+        split=['train_all_instructions'],
+        preprocess_func=GQAPreprocessor(),
+        huge_dataset=True,
+        tags=['multi-modal', 'en', 'vqa', 'quality']))
+class CocoPreprocessor(ResponsePreprocessor):
+    category = [
+        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
+        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
+        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+        'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
+        'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+    ]
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        row['query'] = 'Task: Object Detection'
+        objects = row['objects']
+        objects['ref'] = [self.category[c] for c in objects['category']]
+        row['response'] = '\n'.join(['<ref-object><bbox>'] * len(objects['ref']))
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/coco',
+        hf_dataset_id='detection-datasets/coco',
+        preprocess_func=CocoPreprocessor(),
+        huge_dataset=True,
+        tags=['multi-modal', 'en', 'vqa', 'quality']))
+class LLaVAMixSFTPreprocessor(RowPreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        messages = row['messages']
+        rounds = []
+        for msg in messages:
+            role = msg['role']
+            content = msg['content']
+            text = ''
+            for index in content:
+                if index['type'] == 'text':
+                    text += index['text']
+                elif index['type'] == 'image':
+                    text += '<image>'
+            rounds.append({'role': role, 'content': text})
+        return {'messages': rounds}
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='swift/llava-instruct-mix-vsft',
+        hf_dataset_id='HuggingFaceH4/llava-instruct-mix-vsft',
+        split=['test'],
+        preprocess_func=LLaVAMixSFTPreprocessor(),
+        tags=['multi-modal', 'en', 'vqa', 'quality']))
+class LatexocrPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        row['query'] = 'Using LaTeX to perform OCR on the image.'
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/LaTeX_OCR',
+        hf_dataset_id='linxy/LaTeX_OCR',
+        subsets=['default', 'human_handwrite', 'human_handwrite_print', 'synthetic_handwrite', 'small'],
+        preprocess_func=LatexocrPreprocessor(),
+        split=['train', 'validation', 'test'],
+        tags=['chat', 'ocr', 'multi-modal', 'vision'],
+    ))
+class CapchaImagesPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        row['query'] = 'recognize the content.'
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='AI-ModelScope/captcha-images',
+        split=['train', 'validation'],
+        preprocess_func=CapchaImagesPreprocessor(columns={'solution': 'response'}),
+        tags=['chat', 'multi-modal', 'vision']))
+class ClevrPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
+        query = row.get('query', '')
+        query = (f'{query} Output the thinking process in <think> </think> and '
+                 'final answer (number) in <answer> </answer> tags.')
+        row.update({'query': query})
+        return super().preprocess(row)
+register_dataset(
+    DatasetMeta(
+        ms_dataset_id='okwinds/clevr_cogen_a_train',
+        hf_dataset_id='leonardPKU/clevr_cogen_a_train',
+        preprocess_func=ClevrPreprocessor(),
+        tags=['qa', 'math', 'vision', 'grpo']))

ms-swift/swift/llm/dataset/preprocessor/__pycache__/extra.cpython-310.pyc ADDED Viewed

Binary file (4.23 kB). View file

ms-swift/swift/llm/dataset/preprocessor/core.py ADDED Viewed

	@@ -0,0 +1,529 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import ast
+import os
+from collections import Counter
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, List, Optional, Union
+import numpy as np
+from datasets import Dataset as HfDataset
+from datasets import Image
+from datasets import IterableDataset as HfIterableDataset
+from datasets import Sequence, Value
+from swift.llm import history_to_messages
+from swift.utils import get_logger, is_dist, is_master, safe_ddp_context
+DATASET_TYPE = Union[HfDataset, HfIterableDataset]
+logger = get_logger()
+class RowPreprocessor:
+    standard_keys = ['messages', 'rejected_response', 'label', 'images', 'videos', 'audios', 'tools', 'objects']
+    def __init__(self,
+                 *,
+                 columns: Optional[Dict[str, str]] = None,
+                 dataset_sample: Optional[int] = None,
+                 random_state: Union[np.random.RandomState, int, None] = 42,
+                 traceback_limit: int = 10) -> None:
+        self.columns = columns or {}
+        self.origin_columns = self.columns.copy()  # Higher priority and raise Error
+        images_keys = ['images', 'image']
+        audios_keys = ['audios', 'audio']
+        videos_keys = ['videos', 'video']
+        for mm_type in ['images', 'audios', 'videos']:
+            keys = locals()[f'{mm_type}_keys']
+            for key in keys:
+                self.columns[key] = mm_type
+        self.traceback_limit = traceback_limit
+        self._traceback_counter = 0
+        self.dataset_sample = dataset_sample
+        if not isinstance(random_state, np.random.RandomState):
+            random_state = np.random.RandomState(random_state)
+        self.random_state = random_state
+    @staticmethod
+    def _check_messages(row: Dict[str, Any]) -> None:
+        if 'messages' not in row:
+            return
+        messages = row['messages']
+        assert len(messages) > 0, f'messages: {messages}'
+        # fix swift/SlimOrca
+        for message in messages:
+            keys = set(message.keys()) - {'role', 'content'}
+            for key in keys:
+                message.pop(key)
+        for message in messages:
+            role, content = message['role'], message['content']
+            # The terms 'tool' and 'tool_response' have the same meaning, ensuring compatibility.
+            assert role in {'system', 'user', 'tool_call', 'tool_response', 'tool', 'assistant'}, f'message: {message}'
+            assert content is not None, f'message: {message}'
+    @staticmethod
+    def _cast_images(row: Dict[str, Any]) -> None:
+        images = row.get('images')
+        if isinstance(images, str) or isinstance(images, list) and images and isinstance(images[0], str):
+            if isinstance(images, str):
+                images = [images]
+            for i, image in enumerate(images):
+                images[i] = {'bytes': None, 'path': image}
+            row['images'] = images
+        elif isinstance(images, dict):
+            row['images'] = [images]
+    @staticmethod
+    def _check_rejected_response(row: Dict[str, Any]) -> None:
+        if 'rejected_messages' in row:
+            chosen_messages = row['messages']
+            rejected_messages = row['rejected_messages']
+            messages = []
+            rejected_response = None
+            for chosen_user, chosen_assistant, rejected_user, rejected_assistant in zip(
+                    chosen_messages[::2], chosen_messages[1::2], rejected_messages[::2], rejected_messages[1::2]):
+                assert chosen_user == rejected_user
+                messages.append(chosen_user)
+                messages.append(chosen_assistant)
+                if chosen_assistant != rejected_assistant:
+                    rejected_response = rejected_assistant['content']
+            row['messages'] = messages
+            row['rejected_response'] = rejected_response
+        if 'rejected_response' in row:
+            messages = row['messages']
+            rejected_response = row['rejected_response']
+            if rejected_response is None or rejected_response == messages[-1]['content']:
+                raise ValueError(f'rejected_response: {rejected_response}')
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        raise NotImplementedError
+    def prepare_dataset(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
+        return dataset
+    @staticmethod
+    def batched_to_rows(batched_row: Dict[str, Any]):
+        keys = list(batched_row.keys())
+        batch_size = len(batched_row[keys[0]])
+        return [{key: batched_row[key][i] for key in keys} for i in range(batch_size)]
+    @staticmethod
+    def rows_to_batched(rows: List[Dict[str, Any]]):
+        batched = {}
+        for i, row in enumerate(rows):
+            for k, v in row.items():
+                if k not in batched:
+                    batched[k] = [None] * i
+                batched[k].append(v)
+            # Make all the lengths of v the same.
+            for k in set(batched.keys()) - set(row.keys()):
+                batched[k].append(None)
+        return batched
+    @staticmethod
+    def _remove_prefix_keys(row, prefix: str):
+        for k in list(row.keys()):
+            if k.startswith(prefix):
+                new_k = k[len(prefix):]
+                new_v = row.pop(k)
+                if new_k not in row:
+                    row[new_k] = new_v
+    @staticmethod
+    def _check_objects(row):
+        objects = row.get('objects')
+        if objects is None:
+            return
+        new_objects = {}
+        # Ensure the order
+        for k in ['ref', 'bbox', 'bbox_type', 'image_id']:
+            if k in objects.keys():
+                new_objects[k] = objects[k]
+        row['objects'] = new_objects
+        bbox = new_objects['bbox']
+        # check bbox
+        for box in bbox:
+            assert len(box) in {2, 4}, f'len(box): {len(box)}'
+            if len(box) == 2:
+                continue
+            if box[0] > box[2]:
+                box[0], box[2] = box[2], box[0]
+            if box[1] > box[3]:
+                box[1], box[3] = box[3], box[1]
+    def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
+                           ignore_max_length_error: bool) -> Dict[str, Any]:
+        from ...template import MaxLengthError
+        batched_row = dict(batched_row)
+        assert len(batched_row) > 0
+        self._remove_prefix_keys(batched_row, '__@')  # compat streaming
+        rows = self.batched_to_rows(batched_row)
+        new_rows = []
+        for row in rows:
+            try:
+                row = self.preprocess(row)
+                # support [row1, row2, ...]
+                if row is None:
+                    row = []
+                if isinstance(row, dict):
+                    row = [row]
+                for r in row:
+                    self._check_objects(r)
+                    self._check_messages(r)
+                    self._check_rejected_response(r)
+                    self._cast_images(r)
+            except Exception as e:
+                if strict:
+                    logger.warning('To avoid errors, you can pass `strict=False`.')
+                    raise
+                if isinstance(e, MaxLengthError) and ignore_max_length_error:
+                    pass
+                elif self.traceback_limit is not None and self._traceback_counter < self.traceback_limit:
+                    import traceback
+                    logger.info(traceback.format_exc())
+                    logger.warning('👆👆👆There are errors in the dataset, the data will be deleted')
+                    self._traceback_counter += 1
+                row = []
+            new_rows += row
+        res = self.rows_to_batched(new_rows)
+        self._remove_prefix_keys(res, '__#')  # compat GRPO
+        if len(res) == 0:
+            res['messages'] = []
+        return res
+    @staticmethod
+    def get_features_dataset(dataset: DATASET_TYPE) -> DATASET_TYPE:
+        if dataset.features is None:
+            assert isinstance(dataset, HfIterableDataset)
+            dataset = dataset._resolve_features()
+        return dataset
+    @staticmethod
+    def safe_rename_columns(dataset, columns):
+        dataset = RowPreprocessor.get_features_dataset(dataset)
+        columns_keys = {k.lower(): k for k in dataset.features.keys()}  # lower -> lower/upper
+        safe_columns = {columns_keys[k.lower()]: v for k, v in columns.items() if k.lower() in columns_keys}
+        counter = Counter(safe_columns.values())
+        for k, new_k in list(safe_columns.items()):
+            if counter[new_k] > 1:
+                # For example, if "response" and "answer" match, then no processing is done.
+                safe_columns.pop(k)
+                continue
+        # e.g. Keep {'query': 'query'} to ensure that the query has the highest priority.
+        safe_columns = {k: v for k, v in safe_columns.items() if k != v}
+        if safe_columns:
+            dataset = dataset.rename_columns(safe_columns)
+        return dataset
+    def _rename_columns(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
+        dataset = self.safe_rename_columns(dataset, self.origin_columns)
+        dataset = self.safe_rename_columns(dataset, self.columns)
+        if isinstance(dataset, HfIterableDataset):
+            # fix: https://github.com/huggingface/datasets/issues/6408
+            columns = {k: f'__@{k}' for k in RowPreprocessor.standard_keys if k in dataset.features}
+            if columns:
+                dataset = dataset.rename_columns(columns)
+        return dataset
+    @staticmethod
+    def remove_useless_columns(dataset: DATASET_TYPE) -> DATASET_TYPE:
+        dataset = RowPreprocessor.get_features_dataset(dataset)
+        features = dataset.features
+        k_list = [k for k in RowPreprocessor.standard_keys if k in features]
+        if len(k_list) != len(features):
+            dataset = dataset.select_columns(k_list)
+        return dataset
+    @staticmethod
+    @contextmanager
+    def _patch_arrow_writer():
+        # fix AI-ModelScope/ms_agent_for_agentfabric:all
+        from datasets.arrow_writer import ArrowWriter
+        def _new_init(self, schema=None, features=None, *args, **kwargs):
+            if features is not None:
+                features['messages'] = [{'role': Value(dtype='string'), 'content': Value(dtype='string')}]
+                features['images'] = [{'bytes': Value(dtype='binary'), 'path': Value(dtype='string')}]
+                features['objects'] = {
+                    'ref': Sequence(feature=Value(dtype='string'), length=-1),
+                    'bbox': Sequence(feature=Sequence(feature=Value(dtype='float64'), length=-1), length=-1)
+                }
+            ArrowWriter.__origin_init__(self, schema, features, *args, **kwargs)
+        ArrowWriter.__origin_init__ = ArrowWriter.__init__
+        ArrowWriter.__init__ = _new_init
+        try:
+            yield
+        finally:
+            ArrowWriter.__init__ = ArrowWriter.__origin_init__
+            del ArrowWriter.__origin_init__
+    def _cast_pil_image(self, dataset):
+        features = dataset.features
+        if 'images' in features and isinstance(features['images'], Image) and features['images'].decode:
+            dataset = dataset.cast_column('images', Image(decode=False))
+        return dataset
+    def __call__(
+        self,
+        dataset: DATASET_TYPE,
+        *,
+        num_proc: int = 1,
+        load_from_cache_file: bool = True,
+        strict: bool = False,
+        batch_size: Optional[int] = None,
+    ) -> DATASET_TYPE:
+        from ..utils import sample_dataset
+        if batch_size is None:
+            batch_size = 1000 if isinstance(dataset, HfDataset) else 16
+        if self.dataset_sample is not None:
+            dataset = sample_dataset(dataset, self.dataset_sample, True, self.random_state)
+        map_kwargs = {'batched': True, 'batch_size': batch_size}
+        if isinstance(dataset, HfDataset):
+            if not load_from_cache_file and is_dist() and not is_master():
+                load_from_cache_file = True
+            map_kwargs.update({
+                'num_proc': num_proc,
+                'load_from_cache_file': load_from_cache_file,
+            })
+        # compat GRPO: The solution field will be retained.
+        dataset = RowPreprocessor.get_features_dataset(dataset)
+        if 'solution' in dataset.features:
+            with safe_ddp_context(None, True):
+                dataset = dataset.map(lambda x: {'__#solution': x['solution']}, **map_kwargs)
+        dataset = self._rename_columns(dataset)
+        dataset = self.prepare_dataset(dataset)
+        dataset = self._cast_pil_image(dataset)
+        ignore_max_length_error = True if isinstance(dataset, HfDataset) and num_proc > 1 else False
+        with self._patch_arrow_writer(), safe_ddp_context(None, True):
+            try:
+                dataset_mapped = dataset.map(
+                    self.batched_preprocess,
+                    fn_kwargs={
+                        'strict': strict,
+                        'ignore_max_length_error': ignore_max_length_error
+                    },
+                    remove_columns=list(dataset.features.keys()),
+                    **map_kwargs)
+            except NotImplementedError:
+                pass
+        if isinstance(dataset_mapped, HfDataset) and len(dataset) != len(dataset_mapped):
+            logger.info(
+                f'Dataset filtered, origin length: {len(dataset)}, filtered dataset length: {len(dataset_mapped)}')
+        return dataset_mapped
+class ResponsePreprocessor(RowPreprocessor):
+    """Dataset compatible with older versions of ms-swift"""
+    def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
+        super().__init__(columns=columns, **kwargs)
+        system_keys = ['system', 'system_prompt']
+        query_keys = ['query', 'prompt', 'input', 'instruction', 'question', 'problem']
+        response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers', 'solution'
+                         ] + ['text', 'completion', 'content']
+        for key in system_keys:
+            self.columns[key] = 'system'
+        for key in query_keys:
+            self.columns[key] = 'query'
+        for key in response_keys:
+            self.columns[key] = 'response'
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        response = row.pop('response', None)
+        if response is not None:
+            if isinstance(response, (list, tuple)):
+                from transformers.utils import strtobool
+                # sometimes response is a list, pick one randomly
+                if strtobool(os.environ.get('RANDOM_DATASET_RESPONSE', 'True')):
+                    response = self.random_state.choice(response)
+                else:
+                    response = response[0]
+        history = row.pop('history', None) or []
+        query = row.pop('query', None)
+        system = row.pop('system', None)
+        if isinstance(history, str):  # e.g. "[['query1', 'response1']]"
+            history = ast.literal_eval(history)
+        history.append([query, response])
+        row.update({'messages': history_to_messages(history, system)})
+        return row
+class AlpacaPreprocessor(ResponsePreprocessor):
+    @classmethod
+    def concat_inst_input(cls, instruction, input_):
+        if instruction and input_:
+            query = f'{instruction}\n{input_}'
+        else:
+            query = instruction or input_
+        assert isinstance(query, str), f'query: {query}'
+        return query
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        instruction = row.pop('instruction', None)
+        input_ = row.pop('input', None)
+        output = row.pop('output', None)
+        if output is not None:
+            row['response'] = output
+        row['query'] = self.concat_inst_input(instruction, input_)
+        return super().preprocess(row)
+def default_repair_messages(s: Union[str, Any]) -> Any:
+    if isinstance(s, str):
+        return ast.literal_eval(s)
+    return s
+class MessagesPreprocessor(RowPreprocessor):
+    def __init__(
+            self,
+            *,
+            # If set to None, automatic matching will be performed.
+            role_key: Optional[str] = None,  # 'role', 'from'
+            content_key: Optional[str] = None,  # 'content', 'value'
+            user_role: Optional[str] = None,  # 'user', 'human'
+            assistant_role: Optional[str] = None,  # 'assistant', 'gpt', 'bot'
+            system_role: str = 'system',
+            # 'conversation', 'conversations' -> 'messages'
+            columns: Optional[Dict[str, str]] = None,
+            repair_messages: Callable[[Union[str, List[Dict[str, str]]]],
+                                      Optional[List[Dict[str, str]]]] = default_repair_messages,
+            inner_key: Optional[str] = None,
+            **kwargs):
+        super().__init__(columns=columns, **kwargs)
+        self.role_keys = ['role', 'from'] if role_key is None else [role_key]
+        self.content_keys = ['content', 'value'] if content_key is None else [content_key]
+        self.user_roles = ['user', 'human'] if user_role is None else [user_role]
+        self.assistant_roles = ['assistant', 'gpt', 'bot'] if assistant_role is None else [assistant_role]
+        self.tool_call_roles = ['function_call']
+        self.tool_response_roles = ['function_response', 'observation', 'observations']
+        self.system_role = system_role
+        self.repair_messages = repair_messages
+        self.inner_key = inner_key
+        message_keys = ['messages', 'conversation', 'conversations']
+        for key in message_keys:
+            self.columns[key] = 'messages'
+        # sharegptq
+        system_keys = ['system', 'system_prompt']
+        if system_role not in system_keys:
+            system_keys.append(system_role)
+        for key in system_keys:
+            self.columns[key] = 'system'
+    @staticmethod
+    def _is_sharegpt_format(message: Dict[str, str]) -> bool:
+        if 'role' in message or 'content' in message:
+            return False
+        return True
+    def sharegpt_to_messages(self, messages: List[Dict[str, str]], system: Optional[str]) -> List[Dict[str, str]]:
+        self._to_std_key(messages, 'user', self.user_roles)
+        self._to_std_key(messages, 'assistant', self.assistant_roles)
+        new_messages = []
+        if system is not None:
+            new_messages.append({'role': 'system', 'content': system})
+        for message in messages:
+            user_message = {'role': 'user', 'content': message['user']}
+            assistant_message = {'role': 'assistant', 'content': message['assistant']}
+            new_messages.append(user_message)
+            new_messages.append(assistant_message)
+        return new_messages
+    def to_std_messages(self, messages: List[Dict[str, str]], system: Optional[str]) -> None:
+        if messages[0]['role'] == self.system_role:
+            messages[0]['role'] = 'system'
+        elif system is not None:
+            messages.insert(0, {'role': 'system', 'content': system})
+        for message in messages:
+            role = message['role']
+            if role in self.user_roles:
+                message['role'] = 'user'
+            elif role in self.assistant_roles:
+                message['role'] = 'assistant'
+            elif role.replace('-', '_') in self.tool_call_roles:
+                message['role'] = 'tool_call'
+            elif role.replace('-', '_') in self.tool_response_roles:
+                message['role'] = 'tool_response'
+    @staticmethod
+    def _to_std_key(messages: List[Dict[str, str]], std_key: str, optional_keys: List[str]) -> None:
+        for message in messages:
+            for key in optional_keys:
+                if key in message:
+                    message[std_key] = message.pop(key)
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        if 'rejected_messages' in row:
+            row['rejected_messages'] = MessagesPreprocessor.preprocess(
+                self, {'messages': row['rejected_messages']})['messages']
+        messages = row['messages']
+        if self.inner_key is not None:
+            messages = messages[self.inner_key]
+        messages: Optional[List[Dict[str, str]]] = self.repair_messages(messages)
+        if not messages or isinstance(messages, str):
+            return
+        self._to_std_key(messages, 'role', self.role_keys)
+        self._to_std_key(messages, 'content', self.content_keys)
+        system = row.pop('system', None)
+        if self._is_sharegpt_format(messages[0]):
+            messages = self.sharegpt_to_messages(messages, system)
+        else:
+            self.to_std_messages(messages, system)  # inplace
+        row['messages'] = messages
+        return row
+class ClsPreprocessor(ResponsePreprocessor):
+    def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        res = super().preprocess(row)
+        res['label'] = int(res['label'])
+        return res
+class AutoPreprocessor:
+    def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
+        self.columns = columns or {}
+        self.kwargs = kwargs
+    def _get_preprocessor(self, dataset: DATASET_TYPE) -> RowPreprocessor:
+        features = dataset.features
+        for key in ['conversation', 'conversations', 'messages']:
+            if key in features:
+                return MessagesPreprocessor(**self.kwargs)
+        if 'instruction' in features and 'input' in features:
+            return AlpacaPreprocessor(**self.kwargs)
+        return ResponsePreprocessor(**self.kwargs)
+    def __call__(
+        self,
+        dataset: DATASET_TYPE,
+        *,
+        num_proc: int = 1,
+        load_from_cache_file: bool = True,
+        strict: bool = False,
+    ) -> DATASET_TYPE:
+        dataset = RowPreprocessor.safe_rename_columns(dataset, self.columns)
+        preprocessor = self._get_preprocessor(dataset)
+        return preprocessor(dataset, num_proc=num_proc, load_from_cache_file=load_from_cache_file, strict=strict)

ms-swift/swift/llm/ds_config/zero2_offload.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto"
+    },
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 2e8,
+        "overlap_comm": false,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 2e8,
+        "contiguous_gradients": true
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 2000,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
+}

ms-swift/swift/llm/export/merge_lora.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from swift.llm import ExportArguments, prepare_model_template, save_checkpoint
+from swift.tuners import Swift
+from swift.utils import get_logger
+logger = get_logger()
+def merge_lora(args: ExportArguments, device_map=None, replace_if_exists=False) -> None:
+    if replace_if_exists:
+        logger.info(f'replace_if_exists: {replace_if_exists}')
+    output_dir = getattr(args, 'output_dir', None) or f'{args.adapters[0]}-merged'
+    if os.path.exists(output_dir) and not replace_if_exists:
+        logger.info(f'The weight directory for the merged LoRA already exists in {output_dir}, '
+                    'skipping the saving process.')
+    else:
+        origin_device_map = args.device_map
+        args.device_map = device_map or args.device_map
+        logger.info(f'merge_device_map: {device_map}')
+        model, template = prepare_model_template(args)
+        quant_method = model.model_info.quant_method
+        assert quant_method is None, (f'quant_method: {quant_method}, '
+                                      'quantized model and does not support merge-lora.')
+        logger.info('Merge LoRA...')
+        Swift.merge_and_unload(model)
+        model = model.model
+        logger.info('Saving merged weights...')
+        save_checkpoint(
+            model,
+            template.processor,
+            output_dir,
+            safe_serialization=args.safe_serialization,
+            model_dirs=args.adapters,
+            max_shard_size=args.max_shard_size,
+            additional_saved_files=model.model_meta.additional_saved_files)
+        logger.info(f'Successfully merged LoRA and saved in {output_dir}.')
+        args.device_map = origin_device_map
+    args.model = output_dir
+    args.model_dir = output_dir
+    args.adapters = []

ms-swift/swift/llm/export/quant.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections import defaultdict
+from contextlib import contextmanager
+from typing import Dict, List, Optional
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+from swift.llm import (ExportArguments, HfConfigFactory, MaxLengthError, ProcessorMixin, deep_getattr, get_model_arch,
+                       is_moe_model, load_dataset, prepare_model_template, save_checkpoint, to_device)
+from swift.utils import find_layers, get_logger, get_model_parameter_info
+logger = get_logger()
+class QuantEngine(ProcessorMixin):
+    def __init__(self, args: ExportArguments):
+        self.args = args
+        kwargs = {}
+        if args.quant_method == 'awq':
+            from awq import AutoAWQForCausalLM
+            kwargs['automodel_class'] = AutoAWQForCausalLM
+        self.model, self.template = prepare_model_template(args, **kwargs)
+        self.template.set_mode('train')
+        self.model.config.use_cache = False
+        HfConfigFactory.set_model_config_attr(self.model, 'use_cache', False)
+        self.processor = self.template.processor
+        args.save_args()
+    def quantize(self):
+        args = self.args
+        if args.quant_bits is None:
+            raise ValueError(f'Please set the quant_bits. args.quant_bits: {args.quant_bits}')
+        if args.quant_method == 'awq':
+            self.template.model = self.model.model
+            self.awq_model_quantize()
+            self.model.save_quantized(
+                args.output_dir, safetensors=args.safe_serialization, shard_size=args.max_shard_size)
+        elif args.quant_method == 'gptq':
+            self.template.model = self.model
+            gptq_quantizer = self.gptq_model_quantize()
+            gptq_quantizer.save(
+                self.model,
+                args.output_dir,
+                safe_serialization=args.safe_serialization,
+                max_shard_size=args.max_shard_size)
+        elif args.quant_method == 'bnb':
+            self.model.save_pretrained(
+                args.output_dir, safe_serialization=args.safe_serialization, max_shard_size=args.max_shard_size)
+        else:
+            raise ValueError(f'args.quant_method: {args.quant_method}')
+        logger.info(f'model: {self.model}')
+        logger.info(f'model_parameter_info: {get_model_parameter_info(self.model)}')
+        save_checkpoint(
+            None,
+            self.processor,
+            args.output_dir,
+            model_dirs=[args.model_dir],
+            additional_saved_files=self.model.model_meta.additional_saved_files)
+        logger.info(f'Successfully quantized the model and saved in {args.output_dir}.')
+    @torch.inference_mode()
+    def _prepare_gptq_dataset(self, examples: List[Dict[str, torch.LongTensor]], batch_size: int = 1, *args, **kwargs):
+        res = []
+        for start in tqdm(range(0, len(examples), batch_size)):
+            batched_inputs = examples[start:start + batch_size]
+            inputs = to_device(self.template.data_collator(batched_inputs), self.model.device)
+            if self.model.model_meta.is_multimodal:
+                _, inputs = self.template.pre_forward_hook(self.model, None, inputs)
+            res.append(to_device(inputs, 'cpu'))
+        return res
+    @torch.inference_mode()
+    def _get_quant_dataset(self, *args, **kwargs):
+        args = self.args
+        assert args.quant_method in {'awq', 'gptq'}
+        template = self.template
+        n_samples = args.quant_n_samples
+        block_size = args.max_length
+        # only use train_dataset
+        dataset = load_dataset(
+            args.dataset, split_dataset_ratio=0, shuffle=args.dataset_shuffle, **args.get_dataset_kwargs())[0]
+        logger.info(f'quant_dataset: {dataset}')
+        dataset = dataset.shuffle()
+        samples = []
+        i = 0
+        prog_bar = tqdm(total=n_samples, dynamic_ncols=True)
+        is_multimodal = self.model.model_meta.is_multimodal
+        for data in dataset:
+            try:
+                inputs = template.encode(data)
+            except MaxLengthError:
+                continue
+            if is_multimodal and args.quant_method == 'gptq':
+                inputs.pop('labels', None)
+                samples.append(inputs)
+            else:
+                input_ids = inputs['input_ids']
+                samples += input_ids
+            i += 1
+            prog_bar.update()
+            if i == n_samples:
+                break
+        if is_multimodal and args.quant_method == 'gptq':
+            return samples
+        # now concatenate all samples and split according to block size
+        n_split = len(samples) // block_size
+        logger.info(f'Split into {n_split} blocks')
+        res = []
+        for i in range(n_split):
+            input_ids = samples[i * block_size:(i + 1) * block_size]
+            if args.quant_method == 'gptq':
+                res.append({'input_ids': input_ids})
+            else:
+                res.append(torch.tensor(input_ids)[None])
+        return res
+    @staticmethod
+    @contextmanager
+    def _patch_awq_move_embed(awq_model):
+        _origin_move_embed = awq_model.move_embed
+        def _move_embed(model, device: str):
+            if hasattr(model, '_hf_hook') and device != 'cpu':
+                return
+            _origin_move_embed(model, device)
+        awq_model.move_embed = _move_embed
+        try:
+            yield
+        finally:
+            awq_model.move_embed = _origin_move_embed
+    def get_awq_modules_to_not_convert(self):
+        block_name = self.get_block_name_to_quantize(self.model)
+        block = deep_getattr(self.model, block_name)[-1]
+        prefix, experts = self._get_experts(block)
+        num_experts = len(experts)
+        def cond(name, module):
+            if isinstance(module, nn.Linear) and module.out_features == num_experts:
+                return True
+            return False
+        return find_layers(self.model, cond, min_name_len=2)  # min_name_len: fix Qwen3-MoE
+    def awq_model_quantize(self) -> None:
+        from awq.quantize import quantizer
+        from transformers import AwqConfig
+        args = self.args
+        logger.info(f'Quantization dataset: {args.dataset}')
+        _origin_get_calib_dataset = quantizer.get_calib_dataset
+        quantizer.get_calib_dataset = self._get_quant_dataset
+        quant_config = {
+            'zero_point': True,
+            'q_group_size': args.group_size,
+            'w_bit': args.quant_bits,
+            'version': 'GEMM'
+        }
+        if is_moe_model(self.model):
+            quant_config['modules_to_not_convert'] = self.get_awq_modules_to_not_convert()
+        logger.info(f'quant_config: {quant_config}')
+        logger.info('Start quantizing the model...')
+        with self._patch_awq_move_embed(self.model):
+            self.model.quantize(
+                self.tokenizer, quant_config=quant_config, n_parallel_calib_samples=args.quant_batch_size)
+        quantizer.get_calib_dataset = _origin_get_calib_dataset  # recover
+        if self.model.quant_config.modules_to_not_convert:
+            model_arch = get_model_arch(args.model_meta.model_arch)
+            lm_head_key = model_arch.lm_head or 'lm_head'
+            self.model.quant_config.modules_to_not_convert.append(lm_head_key)
+    @contextmanager
+    def _patch_gptq(self):
+        from optimum.gptq import quantizer
+        _get_dataset_origin = quantizer.get_dataset
+        _prepare_dataset_origin = quantizer.prepare_dataset
+        quantizer.get_dataset = self._get_quant_dataset
+        quantizer.prepare_dataset = self._prepare_gptq_dataset
+        try:
+            yield
+        finally:
+            quantizer.get_dataset = _get_dataset_origin
+            quantizer.prepare_dataset = _prepare_dataset_origin
+    @staticmethod
+    def get_block_name_to_quantize(model: nn.Module) -> Optional[str]:
+        model_arch = get_model_arch(model.model_meta.model_arch)
+        prefix = ''
+        if hasattr(model_arch, 'language_model'):
+            assert len(model_arch.language_model) == 1, f'mllm_arch.language_model: {model_arch.language_model}'
+            prefix = model_arch.language_model[0]
+            model = deep_getattr(model, prefix)
+        module_lists = []
+        for n, m in model.named_modules():
+            if (isinstance(m, (nn.ModuleList, nn.Sequential)) and len(m) >= 10
+                    and 'mlp' not in m[0].__class__.__name__.lower()):  # fix moe
+                module_lists.append((n, m))
+        if module_lists:
+            module_list = max(module_lists, key=lambda x: len(x[1]))
+            return f'{prefix}.{module_list[0]}'.strip('.')
+    @staticmethod
+    def _get_experts(block):
+        for n, m in block.named_modules():
+            if isinstance(m, (nn.ModuleList, nn.Sequential)):
+                return n, m
+    @staticmethod
+    def get_modules_in_block_to_quantize(model, block_name: str):
+        if not is_moe_model(model):
+            return
+        from optimum.gptq.utils import get_layers
+        # Do not quantize the gate part.
+        block = deep_getattr(model, block_name)[-1]
+        prefix, experts = QuantEngine._get_experts(block)
+        num_experts = len(experts)
+        layers = get_layers(block)
+        res = []
+        experts = defaultdict(list)
+        experts_idx = None
+        for name, layer in layers.items():
+            if name.startswith(prefix):
+                suffix = name.rsplit('.', 1)[-1]
+                experts[suffix].append(name)
+                experts_idx = len(res)
+            elif layer.out_features not in {1, num_experts}:
+                res.append([name])
+        res[experts_idx:experts_idx] = experts.values()
+        return res
+    def gptq_model_quantize(self):
+        from optimum.gptq import GPTQQuantizer
+        args = self.args
+        logger.info(f'Quantization dataset: {args.dataset}')
+        block_name_to_quantize = self.get_block_name_to_quantize(self.model)
+        modules_in_block_to_quantize = self.get_modules_in_block_to_quantize(self.model, block_name_to_quantize)
+        logger.info(f'block_name_to_quantize: {block_name_to_quantize}')
+        logger.info(f'modules_in_block_to_quantize: {modules_in_block_to_quantize}')
+        with self._patch_gptq():
+            gptq_quantizer = GPTQQuantizer(
+                bits=args.quant_bits,
+                group_size=args.group_size,
+                dataset=','.join(args.dataset),
+                batch_size=args.quant_batch_size,
+                block_name_to_quantize=block_name_to_quantize,
+                modules_in_block_to_quantize=modules_in_block_to_quantize)
+            gptq_quantizer.serialization_keys.append('block_name_to_quantize')
+            logger.info('Start quantizing the model...')
+            logger.warning('The process of packing the model takes a long time and there is no progress bar. '
+                           'Please be patient and wait...')
+            gptq_quantizer.quantize_model(self.model, self.tokenizer)
+            self.model.config.quantization_config.pop('dataset', None)
+        return gptq_quantizer
+def quantize_model(args: ExportArguments):
+    QuantEngine(args).quantize()

ms-swift/swift/llm/infer/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.09 kB). View file

ms-swift/swift/llm/infer/__pycache__/protocol.cpython-310.pyc ADDED Viewed

Binary file (14.2 kB). View file

ms-swift/swift/llm/infer/infer_engine/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import TYPE_CHECKING
+from swift.utils.import_utils import _LazyModule
+if TYPE_CHECKING:
+    from .vllm_engine import VllmEngine
+    from .grpo_vllm_engine import GRPOVllmEngine
+    from .lmdeploy_engine import LmdeployEngine
+    from .pt_engine import PtEngine
+    from .infer_client import InferClient
+    from .infer_engine import InferEngine
+    from .base import BaseInferEngine
+    from .utils import prepare_generation_config, AdapterRequest, set_device_context, patch_vllm_memory_leak
+else:
+    _import_structure = {
+        'vllm_engine': ['VllmEngine'],
+        'grpo_vllm_engine': ['GRPOVllmEngine'],
+        'lmdeploy_engine': ['LmdeployEngine'],
+        'pt_engine': ['PtEngine'],
+        'infer_client': ['InferClient'],
+        'infer_engine': ['InferEngine'],
+        'base': ['BaseInferEngine'],
+        'utils': ['prepare_generation_config', 'AdapterRequest', 'set_device_context', 'patch_vllm_memory_leak'],
+    }
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()['__file__'],
+        _import_structure,
+        module_spec=__spec__,
+        extra_objects={},
+    )