Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- ms-swift/.ipynb_checkpoints/requirements-checkpoint.txt +1 -0
- ms-swift/processed_data/processed_overlap5s_isoverlap.json +0 -0
- ms-swift/processed_data/processed_overlap5s_issilence.json +0 -0
- ms-swift/processed_data/processed_overlap5s_transcriptions.json +0 -0
- ms-swift/processed_data/processed_silence_issilence.json +0 -0
- ms-swift/silence_overlaps/700/original/overlap5s_speaker_segments.json +0 -0
- ms-swift/silence_overlaps/700/original/overlap5s_transcriptions.json +0 -0
- ms-swift/silence_overlaps/700/original/silence_issilence.json +0 -0
- ms-swift/silence_overlaps/700/original/silence_speaker_segments.json +0 -0
- ms-swift/silence_overlaps/700/original/silence_transcriptions.json +0 -0
- ms-swift/silence_overlaps/700/silence_isoverlap_train.json +1152 -0
- ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/silence_isoverlaps_test-checkpoint.json +27 -0
- ms-swift/silence_overlaps/700/test/overlap5s_transcriptions_test.json +27 -0
- ms-swift/silence_overlaps/700/test/silence_isoverlaps_test.json +27 -0
- ms-swift/silence_overlaps/700/test/silence_issilence_test.json +27 -0
- ms-swift/silence_overlaps/700/train/.ipynb_checkpoints/silence_speaker_segments_train-checkpoint.json +0 -0
- ms-swift/silence_overlaps/700/train/silence_transcriptions_train.json +0 -0
- ms-swift/silence_overlaps/delete_transcript2.json +1 -0
- ms-swift/silence_overlaps/only_overlap/overlap5s_isoverlap_train.json +0 -0
- ms-swift/silence_overlaps/overlap5s_issilence.json +0 -0
- ms-swift/silence_overlaps/silence_transcriptions.json +0 -0
- ms-swift/silence_overlaps/test/.ipynb_checkpoints/test-checkpoint.json +566 -0
- ms-swift/silence_overlaps/test/test.json +566 -0
- ms-swift/swift/llm/dataset/__init__.py +35 -0
- ms-swift/swift/llm/dataset/__pycache__/loader.cpython-310.pyc +0 -0
- ms-swift/swift/llm/dataset/__pycache__/media.cpython-310.pyc +0 -0
- ms-swift/swift/llm/dataset/dataset/__init__.py +2 -0
- ms-swift/swift/llm/dataset/dataset/__pycache__/llm.cpython-310.pyc +0 -0
- ms-swift/swift/llm/dataset/preprocessor/extra.py +112 -0
- ms-swift/swift/llm/ds_config/zero2.json +35 -0
- ms-swift/swift/llm/ds_config/zero3.json +44 -0
- ms-swift/swift/llm/ds_config/zero3_offload.json +42 -0
- ms-swift/swift/llm/eval/__init__.py +2 -0
- ms-swift/swift/llm/eval/utils.py +53 -0
- ms-swift/swift/llm/export/__init__.py +5 -0
- ms-swift/swift/llm/infer/__init__.py +35 -0
- ms-swift/swift/llm/infer/infer_engine/__pycache__/utils.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/__pycache__/model_arch.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/__pycache__/gemma.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/__pycache__/glm.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/__pycache__/llama.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/__pycache__/llava.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/__pycache__/mistral.cpython-310.pyc +0 -0
- ms-swift/swift/llm/model/model/baichuan.py +147 -0
- ms-swift/swift/llm/model/model/mamba.py +41 -0
- ms-swift/swift/llm/model/model/stepfun.py +86 -0
- ms-swift/swift/llm/model/model/telechat.py +59 -0
- ms-swift/swift/llm/model/model/valley.py +82 -0
- ms-swift/swift/llm/model/patcher.py +363 -0
- ms-swift/swift/llm/sampling/distill_sampler.py +148 -0
ms-swift/.ipynb_checkpoints/requirements-checkpoint.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
-r requirements/framework.txt
|
ms-swift/processed_data/processed_overlap5s_isoverlap.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/processed_data/processed_overlap5s_issilence.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/processed_data/processed_overlap5s_transcriptions.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/processed_data/processed_silence_issilence.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/original/overlap5s_speaker_segments.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/original/overlap5s_transcriptions.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/original/silence_issilence.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/original/silence_speaker_segments.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/original/silence_transcriptions.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/silence_isoverlap_train.json
ADDED
|
@@ -0,0 +1,1152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1101857.wav",
|
| 4 |
+
"key": "SODA_PROCESSED--train--1101857",
|
| 5 |
+
"model_output": "No significant overlaps found."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--508884.wav",
|
| 9 |
+
"key": "SODA_PROCESSED--train--508884",
|
| 10 |
+
"model_output": "No significant overlaps found."
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1113674.wav",
|
| 14 |
+
"key": "SODA_PROCESSED--train--1113674",
|
| 15 |
+
"model_output": "No significant overlaps found."
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--158293.wav",
|
| 19 |
+
"key": "SODA_PROCESSED--train--158293",
|
| 20 |
+
"model_output": "No significant overlaps found."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--631363.wav",
|
| 24 |
+
"key": "SODA_PROCESSED--train--631363",
|
| 25 |
+
"model_output": "No significant overlaps found."
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277322.wav",
|
| 29 |
+
"key": "SODA_PROCESSED--train--277322",
|
| 30 |
+
"model_output": "No significant overlaps found."
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1131940.wav",
|
| 34 |
+
"key": "SODA_PROCESSED--train--1131940",
|
| 35 |
+
"model_output": "No significant overlaps found."
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1108753.wav",
|
| 39 |
+
"key": "SODA_PROCESSED--train--1108753",
|
| 40 |
+
"model_output": "No significant overlaps found."
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--27924.wav",
|
| 44 |
+
"key": "SODA_PROCESSED--train--27924",
|
| 45 |
+
"model_output": "No significant overlaps found."
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--374749.wav",
|
| 49 |
+
"key": "SODA_PROCESSED--train--374749",
|
| 50 |
+
"model_output": "No significant overlaps found."
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--821468.wav",
|
| 54 |
+
"key": "SODA_PROCESSED--train--821468",
|
| 55 |
+
"model_output": "No significant overlaps found."
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--416516.wav",
|
| 59 |
+
"key": "SODA_PROCESSED--train--416516",
|
| 60 |
+
"model_output": "No significant overlaps found."
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1029082.wav",
|
| 64 |
+
"key": "SODA_PROCESSED--train--1029082",
|
| 65 |
+
"model_output": "No significant overlaps found."
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--992151.wav",
|
| 69 |
+
"key": "SODA_PROCESSED--train--992151",
|
| 70 |
+
"model_output": "No significant overlaps found."
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--744708.wav",
|
| 74 |
+
"key": "SODA_PROCESSED--train--744708",
|
| 75 |
+
"model_output": "No significant overlaps found."
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--11862.wav",
|
| 79 |
+
"key": "SODA_PROCESSED--train--11862",
|
| 80 |
+
"model_output": "No significant overlaps found."
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--419304.wav",
|
| 84 |
+
"key": "SODA_PROCESSED--train--419304",
|
| 85 |
+
"model_output": "No significant overlaps found."
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--98673.wav",
|
| 89 |
+
"key": "SODA_PROCESSED--train--98673",
|
| 90 |
+
"model_output": "No significant overlaps found."
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--22719.wav",
|
| 94 |
+
"key": "SODA_PROCESSED--train--22719",
|
| 95 |
+
"model_output": "No significant overlaps found."
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1028263.wav",
|
| 99 |
+
"key": "SODA_PROCESSED--train--1028263",
|
| 100 |
+
"model_output": "No significant overlaps found."
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--848051.wav",
|
| 104 |
+
"key": "SODA_PROCESSED--train--848051",
|
| 105 |
+
"model_output": "No significant overlaps found."
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--511668.wav",
|
| 109 |
+
"key": "SODA_PROCESSED--train--511668",
|
| 110 |
+
"model_output": "No significant overlaps found."
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12047.wav",
|
| 114 |
+
"key": "SODA_PROCESSED--train--12047",
|
| 115 |
+
"model_output": "No significant overlaps found."
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--153751.wav",
|
| 119 |
+
"key": "SODA_PROCESSED--train--153751",
|
| 120 |
+
"model_output": "No significant overlaps found."
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795559.wav",
|
| 124 |
+
"key": "SODA_PROCESSED--train--795559",
|
| 125 |
+
"model_output": "No significant overlaps found."
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--387024.wav",
|
| 129 |
+
"key": "SODA_PROCESSED--train--387024",
|
| 130 |
+
"model_output": "No significant overlaps found."
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
|
| 134 |
+
"key": "SODA_PROCESSED--train--1168213",
|
| 135 |
+
"model_output": "No significant overlaps found."
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1123711.wav",
|
| 139 |
+
"key": "SODA_PROCESSED--train--1123711",
|
| 140 |
+
"model_output": "No significant overlaps found."
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--819618.wav",
|
| 144 |
+
"key": "SODA_PROCESSED--train--819618",
|
| 145 |
+
"model_output": "No significant overlaps found."
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--752118.wav",
|
| 149 |
+
"key": "SODA_PROCESSED--train--752118",
|
| 150 |
+
"model_output": "No significant overlaps found."
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--581770.wav",
|
| 154 |
+
"key": "SODA_PROCESSED--train--581770",
|
| 155 |
+
"model_output": "No significant overlaps found."
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--276032.wav",
|
| 159 |
+
"key": "SODA_PROCESSED--train--276032",
|
| 160 |
+
"model_output": "No significant overlaps found."
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556475.wav",
|
| 164 |
+
"key": "SODA_PROCESSED--train--556475",
|
| 165 |
+
"model_output": "No significant overlaps found."
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674667.wav",
|
| 169 |
+
"key": "SODA_PROCESSED--train--674667",
|
| 170 |
+
"model_output": "No significant overlaps found."
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--501206.wav",
|
| 174 |
+
"key": "SODA_PROCESSED--train--501206",
|
| 175 |
+
"model_output": "No significant overlaps found."
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--905725.wav",
|
| 179 |
+
"key": "SODA_PROCESSED--train--905725",
|
| 180 |
+
"model_output": "No significant overlaps found."
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--265829.wav",
|
| 184 |
+
"key": "SODA_PROCESSED--train--265829",
|
| 185 |
+
"model_output": "No significant overlaps found."
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--207527.wav",
|
| 189 |
+
"key": "SODA_PROCESSED--train--207527",
|
| 190 |
+
"model_output": "No significant overlaps found."
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--985415.wav",
|
| 194 |
+
"key": "SODA_PROCESSED--train--985415",
|
| 195 |
+
"model_output": "No significant overlaps found."
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--115102.wav",
|
| 199 |
+
"key": "SODA_PROCESSED--train--115102",
|
| 200 |
+
"model_output": "No significant overlaps found."
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--8820.wav",
|
| 204 |
+
"key": "SODA_PROCESSED--train--8820",
|
| 205 |
+
"model_output": "No significant overlaps found."
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--453454.wav",
|
| 209 |
+
"key": "SODA_PROCESSED--train--453454",
|
| 210 |
+
"model_output": "No significant overlaps found."
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--375003.wav",
|
| 214 |
+
"key": "SODA_PROCESSED--train--375003",
|
| 215 |
+
"model_output": "No significant overlaps found."
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--757426.wav",
|
| 219 |
+
"key": "SODA_PROCESSED--train--757426",
|
| 220 |
+
"model_output": "No significant overlaps found."
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--202914.wav",
|
| 224 |
+
"key": "SODA_PROCESSED--train--202914",
|
| 225 |
+
"model_output": "No significant overlaps found."
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007416.wav",
|
| 229 |
+
"key": "SODA_PROCESSED--train--1007416",
|
| 230 |
+
"model_output": "No significant overlaps found."
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
|
| 234 |
+
"key": "SODA_PROCESSED--train--193891",
|
| 235 |
+
"model_output": "No significant overlaps found."
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--96343.wav",
|
| 239 |
+
"key": "SODA_PROCESSED--train--96343",
|
| 240 |
+
"model_output": "No significant overlaps found."
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1031234.wav",
|
| 244 |
+
"key": "SODA_PROCESSED--train--1031234",
|
| 245 |
+
"model_output": "No significant overlaps found."
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--798455.wav",
|
| 249 |
+
"key": "SODA_PROCESSED--train--798455",
|
| 250 |
+
"model_output": "No significant overlaps found."
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--438636.wav",
|
| 254 |
+
"key": "SODA_PROCESSED--train--438636",
|
| 255 |
+
"model_output": "No significant overlaps found."
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--776766.wav",
|
| 259 |
+
"key": "SODA_PROCESSED--train--776766",
|
| 260 |
+
"model_output": "No significant overlaps found."
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--691830.wav",
|
| 264 |
+
"key": "SODA_PROCESSED--train--691830",
|
| 265 |
+
"model_output": "No significant overlaps found."
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--524306.wav",
|
| 269 |
+
"key": "SODA_PROCESSED--train--524306",
|
| 270 |
+
"model_output": "No significant overlaps found."
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--471264.wav",
|
| 274 |
+
"key": "SODA_PROCESSED--train--471264",
|
| 275 |
+
"model_output": "No significant overlaps found."
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421778.wav",
|
| 279 |
+
"key": "SODA_PROCESSED--train--421778",
|
| 280 |
+
"model_output": "No significant overlaps found."
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--541347.wav",
|
| 284 |
+
"key": "SODA_PROCESSED--train--541347",
|
| 285 |
+
"model_output": "No significant overlaps found."
|
| 286 |
+
},
|
| 287 |
+
{
|
| 288 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1084325.wav",
|
| 289 |
+
"key": "SODA_PROCESSED--train--1084325",
|
| 290 |
+
"model_output": "No significant overlaps found."
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29039.wav",
|
| 294 |
+
"key": "SODA_PROCESSED--train--29039",
|
| 295 |
+
"model_output": "No significant overlaps found."
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1182464.wav",
|
| 299 |
+
"key": "SODA_PROCESSED--train--1182464",
|
| 300 |
+
"model_output": "No significant overlaps found."
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--735517.wav",
|
| 304 |
+
"key": "SODA_PROCESSED--train--735517",
|
| 305 |
+
"model_output": "No significant overlaps found."
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--417260.wav",
|
| 309 |
+
"key": "SODA_PROCESSED--train--417260",
|
| 310 |
+
"model_output": "No significant overlaps found."
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--784738.wav",
|
| 314 |
+
"key": "SODA_PROCESSED--train--784738",
|
| 315 |
+
"model_output": "No significant overlaps found."
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303363.wav",
|
| 319 |
+
"key": "SODA_PROCESSED--train--303363",
|
| 320 |
+
"model_output": "No significant overlaps found."
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795181.wav",
|
| 324 |
+
"key": "SODA_PROCESSED--train--795181",
|
| 325 |
+
"model_output": "No significant overlaps found."
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--33760.wav",
|
| 329 |
+
"key": "SODA_PROCESSED--train--33760",
|
| 330 |
+
"model_output": "No significant overlaps found."
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--126878.wav",
|
| 334 |
+
"key": "SODA_PROCESSED--train--126878",
|
| 335 |
+
"model_output": "No significant overlaps found."
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--317167.wav",
|
| 339 |
+
"key": "SODA_PROCESSED--train--317167",
|
| 340 |
+
"model_output": "No significant overlaps found."
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463322.wav",
|
| 344 |
+
"key": "SODA_PROCESSED--train--463322",
|
| 345 |
+
"model_output": "No significant overlaps found."
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--51285.wav",
|
| 349 |
+
"key": "SODA_PROCESSED--train--51285",
|
| 350 |
+
"model_output": "No significant overlaps found."
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1081079.wav",
|
| 354 |
+
"key": "SODA_PROCESSED--train--1081079",
|
| 355 |
+
"model_output": "No significant overlaps found."
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--58199.wav",
|
| 359 |
+
"key": "SODA_PROCESSED--train--58199",
|
| 360 |
+
"model_output": "No significant overlaps found."
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1017701.wav",
|
| 364 |
+
"key": "SODA_PROCESSED--train--1017701",
|
| 365 |
+
"model_output": "No significant overlaps found."
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--762267.wav",
|
| 369 |
+
"key": "SODA_PROCESSED--train--762267",
|
| 370 |
+
"model_output": "No significant overlaps found."
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4948.wav",
|
| 374 |
+
"key": "SODA_PROCESSED--train--4948",
|
| 375 |
+
"model_output": "No significant overlaps found."
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737676.wav",
|
| 379 |
+
"key": "SODA_PROCESSED--train--737676",
|
| 380 |
+
"model_output": "No significant overlaps found."
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--606362.wav",
|
| 384 |
+
"key": "SODA_PROCESSED--train--606362",
|
| 385 |
+
"model_output": "No significant overlaps found."
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674832.wav",
|
| 389 |
+
"key": "SODA_PROCESSED--train--674832",
|
| 390 |
+
"model_output": "No significant overlaps found."
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--588465.wav",
|
| 394 |
+
"key": "SODA_PROCESSED--train--588465",
|
| 395 |
+
"model_output": "No significant overlaps found."
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--386163.wav",
|
| 399 |
+
"key": "SODA_PROCESSED--train--386163",
|
| 400 |
+
"model_output": "No significant overlaps found."
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421624.wav",
|
| 404 |
+
"key": "SODA_PROCESSED--train--421624",
|
| 405 |
+
"model_output": "No significant overlaps found."
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977126.wav",
|
| 409 |
+
"key": "SODA_PROCESSED--train--977126",
|
| 410 |
+
"model_output": "No significant overlaps found."
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--932676.wav",
|
| 414 |
+
"key": "SODA_PROCESSED--train--932676",
|
| 415 |
+
"model_output": "No significant overlaps found."
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--315768.wav",
|
| 419 |
+
"key": "SODA_PROCESSED--train--315768",
|
| 420 |
+
"model_output": "No significant overlaps found."
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--939669.wav",
|
| 424 |
+
"key": "SODA_PROCESSED--train--939669",
|
| 425 |
+
"model_output": "No significant overlaps found."
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1174912.wav",
|
| 429 |
+
"key": "SODA_PROCESSED--train--1174912",
|
| 430 |
+
"model_output": "No significant overlaps found."
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1023331.wav",
|
| 434 |
+
"key": "SODA_PROCESSED--train--1023331",
|
| 435 |
+
"model_output": "No significant overlaps found."
|
| 436 |
+
},
|
| 437 |
+
{
|
| 438 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--144310.wav",
|
| 439 |
+
"key": "SODA_PROCESSED--train--144310",
|
| 440 |
+
"model_output": "No significant overlaps found."
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1011922.wav",
|
| 444 |
+
"key": "SODA_PROCESSED--train--1011922",
|
| 445 |
+
"model_output": "No significant overlaps found."
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--713730.wav",
|
| 449 |
+
"key": "SODA_PROCESSED--train--713730",
|
| 450 |
+
"model_output": "No significant overlaps found."
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708040.wav",
|
| 454 |
+
"key": "SODA_PROCESSED--train--708040",
|
| 455 |
+
"model_output": "No significant overlaps found."
|
| 456 |
+
},
|
| 457 |
+
{
|
| 458 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--860576.wav",
|
| 459 |
+
"key": "SODA_PROCESSED--train--860576",
|
| 460 |
+
"model_output": "No significant overlaps found."
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1001007.wav",
|
| 464 |
+
"key": "SODA_PROCESSED--train--1001007",
|
| 465 |
+
"model_output": "No significant overlaps found."
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1166623.wav",
|
| 469 |
+
"key": "SODA_PROCESSED--train--1166623",
|
| 470 |
+
"model_output": "No significant overlaps found."
|
| 471 |
+
},
|
| 472 |
+
{
|
| 473 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--372789.wav",
|
| 474 |
+
"key": "SODA_PROCESSED--train--372789",
|
| 475 |
+
"model_output": "No significant overlaps found."
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468603.wav",
|
| 479 |
+
"key": "SODA_PROCESSED--train--468603",
|
| 480 |
+
"model_output": "No significant overlaps found."
|
| 481 |
+
},
|
| 482 |
+
{
|
| 483 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--233562.wav",
|
| 484 |
+
"key": "SODA_PROCESSED--train--233562",
|
| 485 |
+
"model_output": "No significant overlaps found."
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--476626.wav",
|
| 489 |
+
"key": "SODA_PROCESSED--train--476626",
|
| 490 |
+
"model_output": "No significant overlaps found."
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--49462.wav",
|
| 494 |
+
"key": "SODA_PROCESSED--train--49462",
|
| 495 |
+
"model_output": "No significant overlaps found."
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303336.wav",
|
| 499 |
+
"key": "SODA_PROCESSED--train--303336",
|
| 500 |
+
"model_output": "No significant overlaps found."
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--221358.wav",
|
| 504 |
+
"key": "SODA_PROCESSED--train--221358",
|
| 505 |
+
"model_output": "No significant overlaps found."
|
| 506 |
+
},
|
| 507 |
+
{
|
| 508 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--843615.wav",
|
| 509 |
+
"key": "SODA_PROCESSED--train--843615",
|
| 510 |
+
"model_output": "No significant overlaps found."
|
| 511 |
+
},
|
| 512 |
+
{
|
| 513 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--873625.wav",
|
| 514 |
+
"key": "SODA_PROCESSED--train--873625",
|
| 515 |
+
"model_output": "No significant overlaps found."
|
| 516 |
+
},
|
| 517 |
+
{
|
| 518 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4814.wav",
|
| 519 |
+
"key": "SODA_PROCESSED--train--4814",
|
| 520 |
+
"model_output": "No significant overlaps found."
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--280675.wav",
|
| 524 |
+
"key": "SODA_PROCESSED--train--280675",
|
| 525 |
+
"model_output": "No significant overlaps found."
|
| 526 |
+
},
|
| 527 |
+
{
|
| 528 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1132437.wav",
|
| 529 |
+
"key": "SODA_PROCESSED--train--1132437",
|
| 530 |
+
"model_output": "No significant overlaps found."
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--650705.wav",
|
| 534 |
+
"key": "SODA_PROCESSED--train--650705",
|
| 535 |
+
"model_output": "No significant overlaps found."
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186756.wav",
|
| 539 |
+
"key": "SODA_PROCESSED--train--1186756",
|
| 540 |
+
"model_output": "No significant overlaps found."
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--108309.wav",
|
| 544 |
+
"key": "SODA_PROCESSED--train--108309",
|
| 545 |
+
"model_output": "No significant overlaps found."
|
| 546 |
+
},
|
| 547 |
+
{
|
| 548 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--82238.wav",
|
| 549 |
+
"key": "SODA_PROCESSED--train--82238",
|
| 550 |
+
"model_output": "No significant overlaps found."
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--616846.wav",
|
| 554 |
+
"key": "SODA_PROCESSED--train--616846",
|
| 555 |
+
"model_output": "No significant overlaps found."
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--61606.wav",
|
| 559 |
+
"key": "SODA_PROCESSED--train--61606",
|
| 560 |
+
"model_output": "No significant overlaps found."
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--370577.wav",
|
| 564 |
+
"key": "SODA_PROCESSED--train--370577",
|
| 565 |
+
"model_output": "No significant overlaps found."
|
| 566 |
+
},
|
| 567 |
+
{
|
| 568 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--69581.wav",
|
| 569 |
+
"key": "SODA_PROCESSED--train--69581",
|
| 570 |
+
"model_output": "No significant overlaps found."
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--180962.wav",
|
| 574 |
+
"key": "SODA_PROCESSED--train--180962",
|
| 575 |
+
"model_output": "No significant overlaps found."
|
| 576 |
+
},
|
| 577 |
+
{
|
| 578 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--578986.wav",
|
| 579 |
+
"key": "SODA_PROCESSED--train--578986",
|
| 580 |
+
"model_output": "No significant overlaps found."
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--132857.wav",
|
| 584 |
+
"key": "SODA_PROCESSED--train--132857",
|
| 585 |
+
"model_output": "No significant overlaps found."
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--188417.wav",
|
| 589 |
+
"key": "SODA_PROCESSED--train--188417",
|
| 590 |
+
"model_output": "No significant overlaps found."
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--771154.wav",
|
| 594 |
+
"key": "SODA_PROCESSED--train--771154",
|
| 595 |
+
"model_output": "No significant overlaps found."
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--720445.wav",
|
| 599 |
+
"key": "SODA_PROCESSED--train--720445",
|
| 600 |
+
"model_output": "No significant overlaps found."
|
| 601 |
+
},
|
| 602 |
+
{
|
| 603 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--514225.wav",
|
| 604 |
+
"key": "SODA_PROCESSED--train--514225",
|
| 605 |
+
"model_output": "No significant overlaps found."
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--815822.wav",
|
| 609 |
+
"key": "SODA_PROCESSED--train--815822",
|
| 610 |
+
"model_output": "No significant overlaps found."
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--761001.wav",
|
| 614 |
+
"key": "SODA_PROCESSED--train--761001",
|
| 615 |
+
"model_output": "No significant overlaps found."
|
| 616 |
+
},
|
| 617 |
+
{
|
| 618 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1061857.wav",
|
| 619 |
+
"key": "SODA_PROCESSED--train--1061857",
|
| 620 |
+
"model_output": "No significant overlaps found."
|
| 621 |
+
},
|
| 622 |
+
{
|
| 623 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--475793.wav",
|
| 624 |
+
"key": "SODA_PROCESSED--train--475793",
|
| 625 |
+
"model_output": "No significant overlaps found."
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--406352.wav",
|
| 629 |
+
"key": "SODA_PROCESSED--train--406352",
|
| 630 |
+
"model_output": "No significant overlaps found."
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--486716.wav",
|
| 634 |
+
"key": "SODA_PROCESSED--train--486716",
|
| 635 |
+
"model_output": "No significant overlaps found."
|
| 636 |
+
},
|
| 637 |
+
{
|
| 638 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468879.wav",
|
| 639 |
+
"key": "SODA_PROCESSED--train--468879",
|
| 640 |
+
"model_output": "No significant overlaps found."
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--338832.wav",
|
| 644 |
+
"key": "SODA_PROCESSED--train--338832",
|
| 645 |
+
"model_output": "No significant overlaps found."
|
| 646 |
+
},
|
| 647 |
+
{
|
| 648 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--845126.wav",
|
| 649 |
+
"key": "SODA_PROCESSED--train--845126",
|
| 650 |
+
"model_output": "No significant overlaps found."
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--490986.wav",
|
| 654 |
+
"key": "SODA_PROCESSED--train--490986",
|
| 655 |
+
"model_output": "No significant overlaps found."
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1128813.wav",
|
| 659 |
+
"key": "SODA_PROCESSED--train--1128813",
|
| 660 |
+
"model_output": "No significant overlaps found."
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193134.wav",
|
| 664 |
+
"key": "SODA_PROCESSED--train--193134",
|
| 665 |
+
"model_output": "No significant overlaps found."
|
| 666 |
+
},
|
| 667 |
+
{
|
| 668 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--258235.wav",
|
| 669 |
+
"key": "SODA_PROCESSED--train--258235",
|
| 670 |
+
"model_output": "No significant overlaps found."
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--895260.wav",
|
| 674 |
+
"key": "SODA_PROCESSED--train--895260",
|
| 675 |
+
"model_output": "No significant overlaps found."
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--119322.wav",
|
| 679 |
+
"key": "SODA_PROCESSED--train--119322",
|
| 680 |
+
"model_output": "No significant overlaps found."
|
| 681 |
+
},
|
| 682 |
+
{
|
| 683 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--413405.wav",
|
| 684 |
+
"key": "SODA_PROCESSED--train--413405",
|
| 685 |
+
"model_output": "No significant overlaps found."
|
| 686 |
+
},
|
| 687 |
+
{
|
| 688 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--346041.wav",
|
| 689 |
+
"key": "SODA_PROCESSED--train--346041",
|
| 690 |
+
"model_output": "No significant overlaps found."
|
| 691 |
+
},
|
| 692 |
+
{
|
| 693 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--718092.wav",
|
| 694 |
+
"key": "SODA_PROCESSED--train--718092",
|
| 695 |
+
"model_output": "No significant overlaps found."
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--218634.wav",
|
| 699 |
+
"key": "SODA_PROCESSED--train--218634",
|
| 700 |
+
"model_output": "No significant overlaps found."
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--835488.wav",
|
| 704 |
+
"key": "SODA_PROCESSED--train--835488",
|
| 705 |
+
"model_output": "No significant overlaps found."
|
| 706 |
+
},
|
| 707 |
+
{
|
| 708 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--113543.wav",
|
| 709 |
+
"key": "SODA_PROCESSED--train--113543",
|
| 710 |
+
"model_output": "No significant overlaps found."
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
|
| 714 |
+
"key": "SODA_PROCESSED--train--869455",
|
| 715 |
+
"model_output": "No significant overlaps found."
|
| 716 |
+
},
|
| 717 |
+
{
|
| 718 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--330048.wav",
|
| 719 |
+
"key": "SODA_PROCESSED--train--330048",
|
| 720 |
+
"model_output": "No significant overlaps found."
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--766234.wav",
|
| 724 |
+
"key": "SODA_PROCESSED--train--766234",
|
| 725 |
+
"model_output": "No significant overlaps found."
|
| 726 |
+
},
|
| 727 |
+
{
|
| 728 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--535368.wav",
|
| 729 |
+
"key": "SODA_PROCESSED--train--535368",
|
| 730 |
+
"model_output": "No significant overlaps found."
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908444.wav",
|
| 734 |
+
"key": "SODA_PROCESSED--train--908444",
|
| 735 |
+
"model_output": "No significant overlaps found."
|
| 736 |
+
},
|
| 737 |
+
{
|
| 738 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--748910.wav",
|
| 739 |
+
"key": "SODA_PROCESSED--train--748910",
|
| 740 |
+
"model_output": "No significant overlaps found."
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--525710.wav",
|
| 744 |
+
"key": "SODA_PROCESSED--train--525710",
|
| 745 |
+
"model_output": "No significant overlaps found."
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--399572.wav",
|
| 749 |
+
"key": "SODA_PROCESSED--train--399572",
|
| 750 |
+
"model_output": "No significant overlaps found."
|
| 751 |
+
},
|
| 752 |
+
{
|
| 753 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737726.wav",
|
| 754 |
+
"key": "SODA_PROCESSED--train--737726",
|
| 755 |
+
"model_output": "No significant overlaps found."
|
| 756 |
+
},
|
| 757 |
+
{
|
| 758 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--44625.wav",
|
| 759 |
+
"key": "SODA_PROCESSED--train--44625",
|
| 760 |
+
"model_output": "No significant overlaps found."
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1095086.wav",
|
| 764 |
+
"key": "SODA_PROCESSED--train--1095086",
|
| 765 |
+
"model_output": "No significant overlaps found."
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--269886.wav",
|
| 769 |
+
"key": "SODA_PROCESSED--train--269886",
|
| 770 |
+
"model_output": "No significant overlaps found."
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596068.wav",
|
| 774 |
+
"key": "SODA_PROCESSED--train--596068",
|
| 775 |
+
"model_output": "No significant overlaps found."
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--16779.wav",
|
| 779 |
+
"key": "SODA_PROCESSED--train--16779",
|
| 780 |
+
"model_output": "No significant overlaps found."
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--532510.wav",
|
| 784 |
+
"key": "SODA_PROCESSED--train--532510",
|
| 785 |
+
"model_output": "No significant overlaps found."
|
| 786 |
+
},
|
| 787 |
+
{
|
| 788 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--68508.wav",
|
| 789 |
+
"key": "SODA_PROCESSED--train--68508",
|
| 790 |
+
"model_output": "No significant overlaps found."
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--162106.wav",
|
| 794 |
+
"key": "SODA_PROCESSED--train--162106",
|
| 795 |
+
"model_output": "No significant overlaps found."
|
| 796 |
+
},
|
| 797 |
+
{
|
| 798 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--831005.wav",
|
| 799 |
+
"key": "SODA_PROCESSED--train--831005",
|
| 800 |
+
"model_output": "No significant overlaps found."
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--509788.wav",
|
| 804 |
+
"key": "SODA_PROCESSED--train--509788",
|
| 805 |
+
"model_output": "No significant overlaps found."
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--489519.wav",
|
| 809 |
+
"key": "SODA_PROCESSED--train--489519",
|
| 810 |
+
"model_output": "No significant overlaps found."
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1020087.wav",
|
| 814 |
+
"key": "SODA_PROCESSED--train--1020087",
|
| 815 |
+
"model_output": "No significant overlaps found."
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1050427.wav",
|
| 819 |
+
"key": "SODA_PROCESSED--train--1050427",
|
| 820 |
+
"model_output": "No significant overlaps found."
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--842885.wav",
|
| 824 |
+
"key": "SODA_PROCESSED--train--842885",
|
| 825 |
+
"model_output": "No significant overlaps found."
|
| 826 |
+
},
|
| 827 |
+
{
|
| 828 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--166191.wav",
|
| 829 |
+
"key": "SODA_PROCESSED--train--166191",
|
| 830 |
+
"model_output": "No significant overlaps found."
|
| 831 |
+
},
|
| 832 |
+
{
|
| 833 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--826028.wav",
|
| 834 |
+
"key": "SODA_PROCESSED--train--826028",
|
| 835 |
+
"model_output": "No significant overlaps found."
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--715956.wav",
|
| 839 |
+
"key": "SODA_PROCESSED--train--715956",
|
| 840 |
+
"model_output": "No significant overlaps found."
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--967872.wav",
|
| 844 |
+
"key": "SODA_PROCESSED--train--967872",
|
| 845 |
+
"model_output": "No significant overlaps found."
|
| 846 |
+
},
|
| 847 |
+
{
|
| 848 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277060.wav",
|
| 849 |
+
"key": "SODA_PROCESSED--train--277060",
|
| 850 |
+
"model_output": "No significant overlaps found."
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--803822.wav",
|
| 854 |
+
"key": "SODA_PROCESSED--train--803822",
|
| 855 |
+
"model_output": "No significant overlaps found."
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--928982.wav",
|
| 859 |
+
"key": "SODA_PROCESSED--train--928982",
|
| 860 |
+
"model_output": "No significant overlaps found."
|
| 861 |
+
},
|
| 862 |
+
{
|
| 863 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--371354.wav",
|
| 864 |
+
"key": "SODA_PROCESSED--train--371354",
|
| 865 |
+
"model_output": "No significant overlaps found."
|
| 866 |
+
},
|
| 867 |
+
{
|
| 868 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12295.wav",
|
| 869 |
+
"key": "SODA_PROCESSED--train--12295",
|
| 870 |
+
"model_output": "No significant overlaps found."
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1030451.wav",
|
| 874 |
+
"key": "SODA_PROCESSED--train--1030451",
|
| 875 |
+
"model_output": "No significant overlaps found."
|
| 876 |
+
},
|
| 877 |
+
{
|
| 878 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--168398.wav",
|
| 879 |
+
"key": "SODA_PROCESSED--train--168398",
|
| 880 |
+
"model_output": "No significant overlaps found."
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556505.wav",
|
| 884 |
+
"key": "SODA_PROCESSED--train--556505",
|
| 885 |
+
"model_output": "No significant overlaps found."
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--123906.wav",
|
| 889 |
+
"key": "SODA_PROCESSED--train--123906",
|
| 890 |
+
"model_output": "No significant overlaps found."
|
| 891 |
+
},
|
| 892 |
+
{
|
| 893 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1120331.wav",
|
| 894 |
+
"key": "SODA_PROCESSED--train--1120331",
|
| 895 |
+
"model_output": "No significant overlaps found."
|
| 896 |
+
},
|
| 897 |
+
{
|
| 898 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--121129.wav",
|
| 899 |
+
"key": "SODA_PROCESSED--train--121129",
|
| 900 |
+
"model_output": "No significant overlaps found."
|
| 901 |
+
},
|
| 902 |
+
{
|
| 903 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--690063.wav",
|
| 904 |
+
"key": "SODA_PROCESSED--train--690063",
|
| 905 |
+
"model_output": "No significant overlaps found."
|
| 906 |
+
},
|
| 907 |
+
{
|
| 908 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--334902.wav",
|
| 909 |
+
"key": "SODA_PROCESSED--train--334902",
|
| 910 |
+
"model_output": "No significant overlaps found."
|
| 911 |
+
},
|
| 912 |
+
{
|
| 913 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--442672.wav",
|
| 914 |
+
"key": "SODA_PROCESSED--train--442672",
|
| 915 |
+
"model_output": "No significant overlaps found."
|
| 916 |
+
},
|
| 917 |
+
{
|
| 918 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--640494.wav",
|
| 919 |
+
"key": "SODA_PROCESSED--train--640494",
|
| 920 |
+
"model_output": "No significant overlaps found."
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--171463.wav",
|
| 924 |
+
"key": "SODA_PROCESSED--train--171463",
|
| 925 |
+
"model_output": "No significant overlaps found."
|
| 926 |
+
},
|
| 927 |
+
{
|
| 928 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--565809.wav",
|
| 929 |
+
"key": "SODA_PROCESSED--train--565809",
|
| 930 |
+
"model_output": "No significant overlaps found."
|
| 931 |
+
},
|
| 932 |
+
{
|
| 933 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--329396.wav",
|
| 934 |
+
"key": "SODA_PROCESSED--train--329396",
|
| 935 |
+
"model_output": "No significant overlaps found."
|
| 936 |
+
},
|
| 937 |
+
{
|
| 938 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1090942.wav",
|
| 939 |
+
"key": "SODA_PROCESSED--train--1090942",
|
| 940 |
+
"model_output": "No significant overlaps found."
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--980776.wav",
|
| 944 |
+
"key": "SODA_PROCESSED--train--980776",
|
| 945 |
+
"model_output": "No significant overlaps found."
|
| 946 |
+
},
|
| 947 |
+
{
|
| 948 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29858.wav",
|
| 949 |
+
"key": "SODA_PROCESSED--train--29858",
|
| 950 |
+
"model_output": "No significant overlaps found."
|
| 951 |
+
},
|
| 952 |
+
{
|
| 953 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596349.wav",
|
| 954 |
+
"key": "SODA_PROCESSED--train--596349",
|
| 955 |
+
"model_output": "No significant overlaps found."
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--604536.wav",
|
| 959 |
+
"key": "SODA_PROCESSED--train--604536",
|
| 960 |
+
"model_output": "No significant overlaps found."
|
| 961 |
+
},
|
| 962 |
+
{
|
| 963 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--500115.wav",
|
| 964 |
+
"key": "SODA_PROCESSED--train--500115",
|
| 965 |
+
"model_output": "No significant overlaps found."
|
| 966 |
+
},
|
| 967 |
+
{
|
| 968 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--605295.wav",
|
| 969 |
+
"key": "SODA_PROCESSED--train--605295",
|
| 970 |
+
"model_output": "No significant overlaps found."
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--310941.wav",
|
| 974 |
+
"key": "SODA_PROCESSED--train--310941",
|
| 975 |
+
"model_output": "No significant overlaps found."
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1051089.wav",
|
| 979 |
+
"key": "SODA_PROCESSED--train--1051089",
|
| 980 |
+
"model_output": "No significant overlaps found."
|
| 981 |
+
},
|
| 982 |
+
{
|
| 983 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--187351.wav",
|
| 984 |
+
"key": "SODA_PROCESSED--train--187351",
|
| 985 |
+
"model_output": "No significant overlaps found."
|
| 986 |
+
},
|
| 987 |
+
{
|
| 988 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--645254.wav",
|
| 989 |
+
"key": "SODA_PROCESSED--train--645254",
|
| 990 |
+
"model_output": "No significant overlaps found."
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1066203.wav",
|
| 994 |
+
"key": "SODA_PROCESSED--train--1066203",
|
| 995 |
+
"model_output": "No significant overlaps found."
|
| 996 |
+
},
|
| 997 |
+
{
|
| 998 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--913166.wav",
|
| 999 |
+
"key": "SODA_PROCESSED--train--913166",
|
| 1000 |
+
"model_output": "No significant overlaps found."
|
| 1001 |
+
},
|
| 1002 |
+
{
|
| 1003 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--583204.wav",
|
| 1004 |
+
"key": "SODA_PROCESSED--train--583204",
|
| 1005 |
+
"model_output": "No significant overlaps found."
|
| 1006 |
+
},
|
| 1007 |
+
{
|
| 1008 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--575640.wav",
|
| 1009 |
+
"key": "SODA_PROCESSED--train--575640",
|
| 1010 |
+
"model_output": "No significant overlaps found."
|
| 1011 |
+
},
|
| 1012 |
+
{
|
| 1013 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--109428.wav",
|
| 1014 |
+
"key": "SODA_PROCESSED--train--109428",
|
| 1015 |
+
"model_output": "No significant overlaps found."
|
| 1016 |
+
},
|
| 1017 |
+
{
|
| 1018 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--246434.wav",
|
| 1019 |
+
"key": "SODA_PROCESSED--train--246434",
|
| 1020 |
+
"model_output": "No significant overlaps found."
|
| 1021 |
+
},
|
| 1022 |
+
{
|
| 1023 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977434.wav",
|
| 1024 |
+
"key": "SODA_PROCESSED--train--977434",
|
| 1025 |
+
"model_output": "No significant overlaps found."
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--665430.wav",
|
| 1029 |
+
"key": "SODA_PROCESSED--train--665430",
|
| 1030 |
+
"model_output": "No significant overlaps found."
|
| 1031 |
+
},
|
| 1032 |
+
{
|
| 1033 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--960193.wav",
|
| 1034 |
+
"key": "SODA_PROCESSED--train--960193",
|
| 1035 |
+
"model_output": "No significant overlaps found."
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--31287.wav",
|
| 1039 |
+
"key": "SODA_PROCESSED--train--31287",
|
| 1040 |
+
"model_output": "No significant overlaps found."
|
| 1041 |
+
},
|
| 1042 |
+
{
|
| 1043 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--254497.wav",
|
| 1044 |
+
"key": "SODA_PROCESSED--train--254497",
|
| 1045 |
+
"model_output": "No significant overlaps found."
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--774546.wav",
|
| 1049 |
+
"key": "SODA_PROCESSED--train--774546",
|
| 1050 |
+
"model_output": "No significant overlaps found."
|
| 1051 |
+
},
|
| 1052 |
+
{
|
| 1053 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--273875.wav",
|
| 1054 |
+
"key": "SODA_PROCESSED--train--273875",
|
| 1055 |
+
"model_output": "No significant overlaps found."
|
| 1056 |
+
},
|
| 1057 |
+
{
|
| 1058 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--822773.wav",
|
| 1059 |
+
"key": "SODA_PROCESSED--train--822773",
|
| 1060 |
+
"model_output": "No significant overlaps found."
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1052554.wav",
|
| 1064 |
+
"key": "SODA_PROCESSED--train--1052554",
|
| 1065 |
+
"model_output": "No significant overlaps found."
|
| 1066 |
+
},
|
| 1067 |
+
{
|
| 1068 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--179972.wav",
|
| 1069 |
+
"key": "SODA_PROCESSED--train--179972",
|
| 1070 |
+
"model_output": "No significant overlaps found."
|
| 1071 |
+
},
|
| 1072 |
+
{
|
| 1073 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1117467.wav",
|
| 1074 |
+
"key": "SODA_PROCESSED--train--1117467",
|
| 1075 |
+
"model_output": "No significant overlaps found."
|
| 1076 |
+
},
|
| 1077 |
+
{
|
| 1078 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--961025.wav",
|
| 1079 |
+
"key": "SODA_PROCESSED--train--961025",
|
| 1080 |
+
"model_output": "No significant overlaps found."
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--923496.wav",
|
| 1084 |
+
"key": "SODA_PROCESSED--train--923496",
|
| 1085 |
+
"model_output": "No significant overlaps found."
|
| 1086 |
+
},
|
| 1087 |
+
{
|
| 1088 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--41171.wav",
|
| 1089 |
+
"key": "SODA_PROCESSED--train--41171",
|
| 1090 |
+
"model_output": "No significant overlaps found."
|
| 1091 |
+
},
|
| 1092 |
+
{
|
| 1093 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--679971.wav",
|
| 1094 |
+
"key": "SODA_PROCESSED--train--679971",
|
| 1095 |
+
"model_output": "No significant overlaps found."
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--876910.wav",
|
| 1099 |
+
"key": "SODA_PROCESSED--train--876910",
|
| 1100 |
+
"model_output": "No significant overlaps found."
|
| 1101 |
+
},
|
| 1102 |
+
{
|
| 1103 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--617278.wav",
|
| 1104 |
+
"key": "SODA_PROCESSED--train--617278",
|
| 1105 |
+
"model_output": "No significant overlaps found."
|
| 1106 |
+
},
|
| 1107 |
+
{
|
| 1108 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463700.wav",
|
| 1109 |
+
"key": "SODA_PROCESSED--train--463700",
|
| 1110 |
+
"model_output": "No significant overlaps found."
|
| 1111 |
+
},
|
| 1112 |
+
{
|
| 1113 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186623.wav",
|
| 1114 |
+
"key": "SODA_PROCESSED--train--1186623",
|
| 1115 |
+
"model_output": "No significant overlaps found."
|
| 1116 |
+
},
|
| 1117 |
+
{
|
| 1118 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1076109.wav",
|
| 1119 |
+
"key": "SODA_PROCESSED--train--1076109",
|
| 1120 |
+
"model_output": "No significant overlaps found."
|
| 1121 |
+
},
|
| 1122 |
+
{
|
| 1123 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--414445.wav",
|
| 1124 |
+
"key": "SODA_PROCESSED--train--414445",
|
| 1125 |
+
"model_output": "No significant overlaps found."
|
| 1126 |
+
},
|
| 1127 |
+
{
|
| 1128 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--546350.wav",
|
| 1129 |
+
"key": "SODA_PROCESSED--train--546350",
|
| 1130 |
+
"model_output": "No significant overlaps found."
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1144076.wav",
|
| 1134 |
+
"key": "SODA_PROCESSED--train--1144076",
|
| 1135 |
+
"model_output": "No significant overlaps found."
|
| 1136 |
+
},
|
| 1137 |
+
{
|
| 1138 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--104948.wav",
|
| 1139 |
+
"key": "SODA_PROCESSED--train--104948",
|
| 1140 |
+
"model_output": "No significant overlaps found."
|
| 1141 |
+
},
|
| 1142 |
+
{
|
| 1143 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
|
| 1144 |
+
"key": "SODA_PROCESSED--train--836740",
|
| 1145 |
+
"model_output": "No significant overlaps found."
|
| 1146 |
+
},
|
| 1147 |
+
{
|
| 1148 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--437951.wav",
|
| 1149 |
+
"key": "SODA_PROCESSED--train--437951",
|
| 1150 |
+
"model_output": "No significant overlaps found."
|
| 1151 |
+
}
|
| 1152 |
+
]
|
ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/silence_isoverlaps_test-checkpoint.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--484021.wav",
|
| 4 |
+
"key": "SODA_PROCESSED--train--484021",
|
| 5 |
+
"model_output": "No significant overlaps found."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--934782.wav",
|
| 9 |
+
"key": "SODA_PROCESSED--train--934782",
|
| 10 |
+
"model_output": "No significant overlaps found."
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1106201.wav",
|
| 14 |
+
"key": "SODA_PROCESSED--train--1106201",
|
| 15 |
+
"model_output": "No significant overlaps found."
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007774.wav",
|
| 19 |
+
"key": "SODA_PROCESSED--train--1007774",
|
| 20 |
+
"model_output": "No significant overlaps found."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708343.wav",
|
| 24 |
+
"key": "SODA_PROCESSED--train--708343",
|
| 25 |
+
"model_output": "No significant overlaps found."
|
| 26 |
+
}
|
| 27 |
+
]
|
ms-swift/silence_overlaps/700/test/overlap5s_transcriptions_test.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"key": "SODA_PROCESSED--train--730163",
|
| 4 |
+
"audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--730163.wav",
|
| 5 |
+
"model_output": "[00:00 - 00:01] Speaker A: Hey Cora, what's up?\n[00:01 - 00:04] Speaker B: Not much, just hanging out. What about you?\n[00:04 - 00:15] Speaker A: I'm good, just hanging out with some friends. You should come over if you're free. We're having a great time playing board games and catching up on all the latest news and gossip.\n[00:08 - 00:12] Speaker B: Wait, where are you guys hanging out? I might be more tempted if it's somewhere close.\n[00:15 - 00:19] Speaker A: We're at the new café downtown. You should come join us!\n[00:19 - 00:21] Speaker B: Nah, I don't really feel like it.\n[00:22 - 00:30] Speaker A: Come on, it'll be fun! I'll buy you a coffee or maybe something sweet to go with it like their famous chocolate croissants that just came out of the oven.\n[00:30 - 00:34] Speaker B: Are there a lot of people there? I kinda want to avoid a crowd.\n[00:34 - 00:39] Speaker A: It's pretty chill right now, not too crowded. So, how about that drink?\n[00:39 - 00:41] Speaker B: Alright, fine. I'll come for one drink."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"key": "SODA_PROCESSED--train--1180329",
|
| 9 |
+
"audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1180329.wav",
|
| 10 |
+
"model_output": "[00:00 - 00:01] Speaker A: I'm sorry I failed the exam.\n[00:02 - 00:06] Speaker B: Ryann, what happened? You were doing so well in class.\n[00:06 - 00:15] Speaker A: I don't know. I just couldn't focus on the questions, even though I had studied them thoroughly and practiced similar problems multiple times before the exam.\n[00:09 - 00:15] Speaker B: Wait, when you say you couldn't focus, do you mean you were distracted by something specific, or was it just a general feeling?\n[00:16 - 00:20] Speaker A: It was just a general feeling. I couldn't focus on the test at all.\n[00:20 - 00:23] Speaker B: Did something happen that day that may have distracted you?\n[00:23 - 00:28] Speaker A: No, nothing happened. I just couldn't concentrate, no matter how hard I tried.\n[00:29 - 00:33] Speaker B: Sometimes stress or anxiety can sneak up on us. Do you think that might have been a factor?\n[00:34 - 00:39] Speaker A: Maybe, but I'm positive nothing specific happened. I just couldn't focus."
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"key": "SODA_PROCESSED--train--366333",
|
| 14 |
+
"audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--366333.wav",
|
| 15 |
+
"model_output": "[00:00 - 00:15] Speaker A: Hi, I'm calling about the job opening. I saw the posting online and I'm very interested in applying for the position and learning more about the company's culture, values, and the specific challenges this role would be addressing in the current market landscape.\n[00:06 - 00:12] Speaker B: Sorry to jump in, but could you clarify which position you're referring to? We have several openings right now.\n[00:16 - 00:19] Speaker A: Oh, sure! I'm interested in the Marketing Manager position.\n[00:19 - 00:24] Speaker B: Great! We're looking for someone with your skills and experience. Tell me a little bit about yourself.\n[00:24 - 00:44] Speaker A: Well, I have a degree in marketing and I've been working in the field for about 5 years now. I've had a lot of success with the campaigns I've worked on and I think my skills would be a great fit for contributing to your company's growth and driving successful marketing strategies that align with your brand vision while adapting to emerging digital marketing trends.\n[00:45 - 00:51] Speaker B: That sounds impressive! Just to make sure, have you had experience managing a team, as this role will require leadership skills?\n[00:52 - 00:58] Speaker A: Yes, I've led a small team of 5 in my current role, and we've consistently met our targets.\n[00:58 - 01:04] Speaker B: Wow, it sounds like you would be perfect for this position! Are you available to come in for an interview tomorrow?\n[01:04 - 01:07] Speaker A: Absolutely! I'll be there at 10am.\n[01:07 - 01:08] Speaker B: Great, we'll see you then!"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"key": "SODA_PROCESSED--train--762905",
|
| 19 |
+
"audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--762905.wav",
|
| 20 |
+
"model_output": "[00:00 - 00:22] Speaker A: Sarah, I've been feeling really stuck lately. I'm not sure if I'm happy with my current situation. I'm considering a change of scenery, a new job, or even moving to a new city. What do you think about making such a big change at this point in my life? I'm particularly concerned about the financial implications and whether I'm emotionally ready for such a major transition.\n[00:12 - 00:22] Speaker B: Sorry to jump in, but when you say \"change of scenery,\" do you mean you're leaning more towards moving to a new city, or is it more about finding a new job where you are now?\n[00:23 - 00:35] Speaker A: That's a good question. Honestly, I'm not sure yet. It could be either, or maybe even both. I just feel like something needs to change. I'm not sure what exactly, but something just doesn't feel right.\n[00:35 - 00:52] Speaker B: Maybe it would help to sit down and make a list of pros and cons for each option? That way you can get all of your thoughts out on paper and see things more clearly. Plus, it might help you to talk through your options with someone else too. So if you want to bounce any ideas off me, I'm here for any brainstorming or just to listen if you need to vent.\n[00:53 - 01:02] Speaker A: Actually, that sounds perfect. I think talking it out with you could really help me figure things out. Thank you, Sarah. I really appreciate your support.\n[01:02 - 01:15] Speaker B: Absolutely! I'm always here for you, Veronika. Let me know when you want to schedule a proper brainstorming session where we can go through all your options in detail and maybe even research some potential cities or job opportunities together."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"key": "SODA_PROCESSED--train--836452",
|
| 24 |
+
"audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--836452.wav",
|
| 25 |
+
"model_output": "[00:00 - 00:01] Speaker A: So, what's the verdict?\n[00:01 - 00:20] Speaker B: Well, overall, your health is good. Your blood work looks great and your physical exam was normal. However, we did find something abnormal on your mammogram. There's a mass in your left breast that we're going to need to investigate further with additional tests including an ultrasound and possibly an MRI to get a clearer picture of what we're dealing with.\n[00:13 - 00:14] Speaker A: What does that mean?\n[00:20 - 00:25] Speaker B: It could be benign or it could be cancerous. We'll need to do a biopsy to know for sure.\n[00:25 - 00:33] Speaker A: Before we discuss the next step, I just want to express how anxious I am about this. How soon can we get the biopsy done?\n[00:34 - 00:45] Speaker B: We'll schedule you for a biopsy as soon as possible. After we get the results back, we'll know more about what treatment options are available and develop a comprehensive care plan tailored to your specific needs."
|
| 26 |
+
}
|
| 27 |
+
]
|
ms-swift/silence_overlaps/700/test/silence_isoverlaps_test.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--484021.wav",
|
| 4 |
+
"key": "SODA_PROCESSED--train--484021",
|
| 5 |
+
"model_output": "No significant overlaps found."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--934782.wav",
|
| 9 |
+
"key": "SODA_PROCESSED--train--934782",
|
| 10 |
+
"model_output": "No significant overlaps found."
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1106201.wav",
|
| 14 |
+
"key": "SODA_PROCESSED--train--1106201",
|
| 15 |
+
"model_output": "No significant overlaps found."
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007774.wav",
|
| 19 |
+
"key": "SODA_PROCESSED--train--1007774",
|
| 20 |
+
"model_output": "No significant overlaps found."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708343.wav",
|
| 24 |
+
"key": "SODA_PROCESSED--train--708343",
|
| 25 |
+
"model_output": "No significant overlaps found."
|
| 26 |
+
}
|
| 27 |
+
]
|
ms-swift/silence_overlaps/700/test/silence_issilence_test.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--84371.wav",
|
| 4 |
+
"key": "SODA_PROCESSED--train--84371",
|
| 5 |
+
"model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:32 to 00:37."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--185821.wav",
|
| 9 |
+
"key": "SODA_PROCESSED--train--185821",
|
| 10 |
+
"model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:16 to 00:22."
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908191.wav",
|
| 14 |
+
"key": "SODA_PROCESSED--train--908191",
|
| 15 |
+
"model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:09 to 00:14."
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--203528.wav",
|
| 19 |
+
"key": "SODA_PROCESSED--train--203528",
|
| 20 |
+
"model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:20 to 00:25."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
|
| 24 |
+
"key": "SODA_PROCESSED--train--1168213",
|
| 25 |
+
"model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:08 to 00:13."
|
| 26 |
+
}
|
| 27 |
+
]
|
ms-swift/silence_overlaps/700/train/.ipynb_checkpoints/silence_speaker_segments_train-checkpoint.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/700/train/silence_transcriptions_train.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/delete_transcript2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
ms-swift/silence_overlaps/only_overlap/overlap5s_isoverlap_train.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/overlap5s_issilence.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/silence_transcriptions.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/silence_overlaps/test/.ipynb_checkpoints/test-checkpoint.json
ADDED
|
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"SODA_PROCESSED--train--790538": {
|
| 3 |
+
"original_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA: Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
|
| 4 |
+
"cleaned_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA:Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
|
| 5 |
+
"total_duration": 70.17238095238095,
|
| 6 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/stereo_dialogue.wav",
|
| 7 |
+
"speaker_tracks": {
|
| 8 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/A_track.wav",
|
| 9 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/B_track.wav"
|
| 10 |
+
},
|
| 11 |
+
"error_type": "error_after_interrupt",
|
| 12 |
+
"segments": [
|
| 13 |
+
{
|
| 14 |
+
"speaker": "A",
|
| 15 |
+
"text": "Hey Val, are you all packed for our camping trip?",
|
| 16 |
+
"original_text": "Hey Val, are you all packed for our camping trip?",
|
| 17 |
+
"start_time": 0,
|
| 18 |
+
"end_time": 3.355283446712018,
|
| 19 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_0_A.wav",
|
| 20 |
+
"silence_duration": 0,
|
| 21 |
+
"is_interrupted": false
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"speaker": "B",
|
| 25 |
+
"text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
|
| 26 |
+
"original_text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
|
| 27 |
+
"start_time": 3.7205840462203787,
|
| 28 |
+
"end_time": 8.2368652253587,
|
| 29 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_1_B.wav",
|
| 30 |
+
"silence_duration": 0.3653005995083607,
|
| 31 |
+
"is_interrupted": false
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"speaker": "A",
|
| 35 |
+
"text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of",
|
| 36 |
+
"original_text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.",
|
| 37 |
+
"start_time": 8.610541776107988,
|
| 38 |
+
"end_time": 23.808002093568305,
|
| 39 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_2_A.wav",
|
| 40 |
+
"silence_duration": 0.37367655074928696,
|
| 41 |
+
"is_interrupted": true,
|
| 42 |
+
"text_after_interrupt": "fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience."
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"speaker": "B",
|
| 46 |
+
"text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
|
| 47 |
+
"original_text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
|
| 48 |
+
"start_time": 15.588138147990074,
|
| 49 |
+
"end_time": 19.686460143454926,
|
| 50 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_3_B.wav",
|
| 51 |
+
"silence_duration": 0.407293268908306,
|
| 52 |
+
"is_interrupted": false
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"speaker": "A",
|
| 56 |
+
"text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
|
| 57 |
+
"original_text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
|
| 58 |
+
"start_time": 24.198980395182797,
|
| 59 |
+
"end_time": 30.189728694502527,
|
| 60 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_4_A.wav",
|
| 61 |
+
"silence_duration": 0.39097830161449093,
|
| 62 |
+
"is_interrupted": false
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"speaker": "B",
|
| 66 |
+
"text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
|
| 67 |
+
"original_text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
|
| 68 |
+
"start_time": 30.59422649262823,
|
| 69 |
+
"end_time": 35.60973669670986,
|
| 70 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_5_B.wav",
|
| 71 |
+
"silence_duration": 0.4044977981257025,
|
| 72 |
+
"is_interrupted": false
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"speaker": "A",
|
| 76 |
+
"text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
|
| 77 |
+
"original_text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
|
| 78 |
+
"start_time": 36.08564285355298,
|
| 79 |
+
"end_time": 45.52455441817884,
|
| 80 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_6_A.wav",
|
| 81 |
+
"silence_duration": 0.47590615684312176,
|
| 82 |
+
"is_interrupted": false
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"speaker": "B",
|
| 86 |
+
"text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
|
| 87 |
+
"original_text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
|
| 88 |
+
"start_time": 45.86997259942616,
|
| 89 |
+
"end_time": 50.33981386926743,
|
| 90 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_7_B.wav",
|
| 91 |
+
"silence_duration": 0.34541818124732176,
|
| 92 |
+
"is_interrupted": false
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"speaker": "A",
|
| 96 |
+
"text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
|
| 97 |
+
"original_text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
|
| 98 |
+
"start_time": 50.65173673057719,
|
| 99 |
+
"end_time": 56.66570498454544,
|
| 100 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_8_A.wav",
|
| 101 |
+
"silence_duration": 0.3119228613097595,
|
| 102 |
+
"is_interrupted": false
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"speaker": "B",
|
| 106 |
+
"text": "Definitely. So, what time are we leaving tomorrow morning?",
|
| 107 |
+
"original_text": "Definitely. So, what time are we leaving tomorrow morning?",
|
| 108 |
+
"start_time": 57.18372257432499,
|
| 109 |
+
"end_time": 60.84086543146785,
|
| 110 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_9_B.wav",
|
| 111 |
+
"silence_duration": 0.5180175897795466,
|
| 112 |
+
"is_interrupted": false
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"speaker": "A",
|
| 116 |
+
"text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
|
| 117 |
+
"original_text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
|
| 118 |
+
"start_time": 61.21524274027814,
|
| 119 |
+
"end_time": 67.84453979243234,
|
| 120 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_10_A.wav",
|
| 121 |
+
"silence_duration": 0.37437730881029474,
|
| 122 |
+
"is_interrupted": false
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"speaker": "B",
|
| 126 |
+
"text": "Sounds perfect. See you then!",
|
| 127 |
+
"original_text": "Sounds perfect. See you then!",
|
| 128 |
+
"start_time": 68.44252417070729,
|
| 129 |
+
"end_time": 70.17241079202248,
|
| 130 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_11_B.wav",
|
| 131 |
+
"silence_duration": 0.5979843782749388,
|
| 132 |
+
"is_interrupted": false
|
| 133 |
+
}
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
"SODA_PROCESSED--train--123906": {
|
| 137 |
+
"original_text": "A: You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
|
| 138 |
+
"cleaned_text": "A:You know, it's really annoying when things are loose and they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
|
| 139 |
+
"total_duration": 81.61818594104308,
|
| 140 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/stereo_dialogue.wav",
|
| 141 |
+
"speaker_tracks": {
|
| 142 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/A_track.wav",
|
| 143 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/B_track.wav"
|
| 144 |
+
},
|
| 145 |
+
"error_type": "error_after_interrupt",
|
| 146 |
+
"segments": [
|
| 147 |
+
{
|
| 148 |
+
"speaker": "A",
|
| 149 |
+
"text": "You know, it's really annoying when things are loose and",
|
| 150 |
+
"original_text": "You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.",
|
| 151 |
+
"start_time": 0,
|
| 152 |
+
"end_time": 13.293424036281179,
|
| 153 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_0_A.wav",
|
| 154 |
+
"silence_duration": 0,
|
| 155 |
+
"is_interrupted": true,
|
| 156 |
+
"text_after_interrupt": "they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship."
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"speaker": "B",
|
| 160 |
+
"text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
|
| 161 |
+
"original_text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
|
| 162 |
+
"start_time": 3.250793650793651,
|
| 163 |
+
"end_time": 9.961360544217687,
|
| 164 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_1_B.wav",
|
| 165 |
+
"silence_duration": 0.5919206270367532,
|
| 166 |
+
"is_interrupted": false
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"speaker": "A",
|
| 170 |
+
"text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
|
| 171 |
+
"original_text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
|
| 172 |
+
"start_time": 13.747669310225408,
|
| 173 |
+
"end_time": 19.123088811359196,
|
| 174 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_2_A.wav",
|
| 175 |
+
"silence_duration": 0.4542452739442295,
|
| 176 |
+
"is_interrupted": false
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"speaker": "B",
|
| 180 |
+
"text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
|
| 181 |
+
"original_text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
|
| 182 |
+
"start_time": 19.55223911029212,
|
| 183 |
+
"end_time": 26.39051575428305,
|
| 184 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_3_B.wav",
|
| 185 |
+
"silence_duration": 0.4291502989329248,
|
| 186 |
+
"is_interrupted": false
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"speaker": "A",
|
| 190 |
+
"text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
|
| 191 |
+
"original_text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
|
| 192 |
+
"start_time": 26.849330263466445,
|
| 193 |
+
"end_time": 39.388105773670524,
|
| 194 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_4_A.wav",
|
| 195 |
+
"silence_duration": 0.45881450918339584,
|
| 196 |
+
"is_interrupted": false
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"speaker": "B",
|
| 200 |
+
"text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
|
| 201 |
+
"original_text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
|
| 202 |
+
"start_time": 39.82405615117125,
|
| 203 |
+
"end_time": 51.73589288586513,
|
| 204 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_5_B.wav",
|
| 205 |
+
"silence_duration": 0.4359503775007275,
|
| 206 |
+
"is_interrupted": false
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"speaker": "A",
|
| 210 |
+
"text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
|
| 211 |
+
"original_text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
|
| 212 |
+
"start_time": 52.27556460896347,
|
| 213 |
+
"end_time": 58.811981842523565,
|
| 214 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_6_A.wav",
|
| 215 |
+
"silence_duration": 0.5396717230983441,
|
| 216 |
+
"is_interrupted": false
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"speaker": "B",
|
| 220 |
+
"text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
|
| 221 |
+
"original_text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
|
| 222 |
+
"start_time": 59.3890926661041,
|
| 223 |
+
"end_time": 63.90537384524242,
|
| 224 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_7_B.wav",
|
| 225 |
+
"silence_duration": 0.5771108235805331,
|
| 226 |
+
"is_interrupted": false
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"speaker": "A",
|
| 230 |
+
"text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
|
| 231 |
+
"original_text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
|
| 232 |
+
"start_time": 64.32218282146329,
|
| 233 |
+
"end_time": 72.3911170618261,
|
| 234 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_8_A.wav",
|
| 235 |
+
"silence_duration": 0.4168089762208619,
|
| 236 |
+
"is_interrupted": false
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"speaker": "B",
|
| 240 |
+
"text": "Yeah, you probably should. Thanks for fixing that, by the way.",
|
| 241 |
+
"original_text": "Yeah, you probably should. Thanks for fixing that, by the way.",
|
| 242 |
+
"start_time": 72.7979076035892,
|
| 243 |
+
"end_time": 76.45505046073205,
|
| 244 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_9_B.wav",
|
| 245 |
+
"silence_duration": 0.4067905417630988,
|
| 246 |
+
"is_interrupted": false
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"speaker": "A",
|
| 250 |
+
"text": "No problem. Just doing my part to keep things in working order around here.",
|
| 251 |
+
"original_text": "No problem. Just doing my part to keep things in working order around here.",
|
| 252 |
+
"start_time": 76.90457674986,
|
| 253 |
+
"end_time": 81.6182275435108,
|
| 254 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_10_A.wav",
|
| 255 |
+
"silence_duration": 0.4495262891279488,
|
| 256 |
+
"is_interrupted": false
|
| 257 |
+
}
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
"SODA_PROCESSED--train--1112763": {
|
| 261 |
+
"original_dialog_id": "",
|
| 262 |
+
"dialog_index": 1112763,
|
| 263 |
+
"processed_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA: Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable. \nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
|
| 264 |
+
"clean_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA:Just standing in this pool of water, trying to escape the heat. It's really hot out and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.\nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
|
| 265 |
+
"speaker_tracks": {
|
| 266 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/A_track.wav",
|
| 267 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/B_track.wav"
|
| 268 |
+
},
|
| 269 |
+
"error_type": "error_after_interrupt",
|
| 270 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/stereo_dialogue.wav",
|
| 271 |
+
"total_duration": 80.37741496598639,
|
| 272 |
+
"segments": [
|
| 273 |
+
{
|
| 274 |
+
"speaker": "A",
|
| 275 |
+
"text": "Hey Sarah, what's up?",
|
| 276 |
+
"original_text": "Hey Sarah, what's up?",
|
| 277 |
+
"start_time": 0,
|
| 278 |
+
"end_time": 1.6486167800453515,
|
| 279 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_0_A.wav",
|
| 280 |
+
"silence_duration": 0,
|
| 281 |
+
"is_interrupted": false
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"speaker": "B",
|
| 285 |
+
"text": "Not much, what are you up to?",
|
| 286 |
+
"original_text": "Not much, what are you up to?",
|
| 287 |
+
"start_time": 2.244027328872719,
|
| 288 |
+
"end_time": 3.497904879893127,
|
| 289 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_1_B.wav",
|
| 290 |
+
"silence_duration": 0.5954105488273673,
|
| 291 |
+
"is_interrupted": false
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"speaker": "A",
|
| 295 |
+
"text": "Just standing in this pool of water, trying to escape the heat. It's really hot out",
|
| 296 |
+
"original_text": "Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.",
|
| 297 |
+
"start_time": 3.9748179407694604,
|
| 298 |
+
"end_time": 18.847198893150413,
|
| 299 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_2_A.wav",
|
| 300 |
+
"silence_duration": 0.47691306087633345,
|
| 301 |
+
"is_interrupted": true,
|
| 302 |
+
"text_after_interrupt": "and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable."
|
| 303 |
+
},
|
| 304 |
+
{
|
| 305 |
+
"speaker": "B",
|
| 306 |
+
"text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
|
| 307 |
+
"original_text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
|
| 308 |
+
"start_time": 9.09481794076946,
|
| 309 |
+
"end_time": 15.352595718547239,
|
| 310 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_3_B.wav",
|
| 311 |
+
"silence_duration": 0.5962546040921268,
|
| 312 |
+
"is_interrupted": false
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"speaker": "A",
|
| 316 |
+
"text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
|
| 317 |
+
"original_text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
|
| 318 |
+
"start_time": 19.411937689342565,
|
| 319 |
+
"end_time": 39.3462687551022,
|
| 320 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_4_A.wav",
|
| 321 |
+
"silence_duration": 0.5647387961921536,
|
| 322 |
+
"is_interrupted": false
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"speaker": "B",
|
| 326 |
+
"text": "That sounds nice. I wish I could be in a pool right now too.",
|
| 327 |
+
"original_text": "That sounds nice. I wish I could be in a pool right now too.",
|
| 328 |
+
"start_time": 39.7352726618639,
|
| 329 |
+
"end_time": 42.986066312657556,
|
| 330 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_5_B.wav",
|
| 331 |
+
"silence_duration": 0.3890039067616998,
|
| 332 |
+
"is_interrupted": false
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"speaker": "A",
|
| 336 |
+
"text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
|
| 337 |
+
"original_text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
|
| 338 |
+
"start_time": 43.57081570786076,
|
| 339 |
+
"end_time": 58.803105957293866,
|
| 340 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_6_A.wav",
|
| 341 |
+
"silence_duration": 0.5847493952032004,
|
| 342 |
+
"is_interrupted": false
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"speaker": "B",
|
| 346 |
+
"text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
|
| 347 |
+
"original_text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
|
| 348 |
+
"start_time": 59.111149637610815,
|
| 349 |
+
"end_time": 63.94090020450424,
|
| 350 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_7_B.wav",
|
| 351 |
+
"silence_duration": 0.3080436803169506,
|
| 352 |
+
"is_interrupted": false
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"speaker": "A",
|
| 356 |
+
"text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
|
| 357 |
+
"original_text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
|
| 358 |
+
"start_time": 64.4324265657298,
|
| 359 |
+
"end_time": 78.10897985371166,
|
| 360 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_8_A.wav",
|
| 361 |
+
"silence_duration": 0.4915263612255627,
|
| 362 |
+
"is_interrupted": false
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"speaker": "B",
|
| 366 |
+
"text": "Me too, Cleveland. Me too.",
|
| 367 |
+
"original_text": "Me too, Cleveland. Me too.",
|
| 368 |
+
"start_time": 78.5430769468726,
|
| 369 |
+
"end_time": 80.37745336410616,
|
| 370 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_9_B.wav",
|
| 371 |
+
"silence_duration": 0.4340970931609471,
|
| 372 |
+
"is_interrupted": false
|
| 373 |
+
}
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
"SODA_PROCESSED--train--822773": {
|
| 377 |
+
"original_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA: I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 378 |
+
"cleaned_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA:I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 379 |
+
"total_duration": 45.90185941043084,
|
| 380 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/stereo_dialogue.wav",
|
| 381 |
+
"speaker_tracks": {
|
| 382 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/A_track.wav",
|
| 383 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/B_track.wav"
|
| 384 |
+
},
|
| 385 |
+
"error_type": "error_after_interrupt",
|
| 386 |
+
"segments": [
|
| 387 |
+
{
|
| 388 |
+
"speaker": "A",
|
| 389 |
+
"text": "Hey, Sarah! I'm so glad to see you!",
|
| 390 |
+
"original_text": "Hey, Sarah! I'm so glad to see you!",
|
| 391 |
+
"start_time": 0,
|
| 392 |
+
"end_time": 2.7747845804988662,
|
| 393 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_0_A.wav",
|
| 394 |
+
"silence_duration": 0,
|
| 395 |
+
"is_interrupted": false
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"speaker": "B",
|
| 399 |
+
"text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
|
| 400 |
+
"original_text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
|
| 401 |
+
"start_time": 3.1698846088737547,
|
| 402 |
+
"end_time": 7.268206604338607,
|
| 403 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_1_B.wav",
|
| 404 |
+
"silence_duration": 0.39510002837488867,
|
| 405 |
+
"is_interrupted": false
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"speaker": "A",
|
| 409 |
+
"text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with",
|
| 410 |
+
"original_text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.",
|
| 411 |
+
"start_time": 7.859956363848243,
|
| 412 |
+
"end_time": 22.209888336637356,
|
| 413 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_2_A.wav",
|
| 414 |
+
"silence_duration": 0.5917497595096354,
|
| 415 |
+
"is_interrupted": true,
|
| 416 |
+
"text_after_interrupt": "me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities."
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"speaker": "B",
|
| 420 |
+
"text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
|
| 421 |
+
"original_text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
|
| 422 |
+
"start_time": 14.558913279948015,
|
| 423 |
+
"end_time": 19.574423484029648,
|
| 424 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_3_B.wav",
|
| 425 |
+
"silence_duration": 0.35472772157833465,
|
| 426 |
+
"is_interrupted": false
|
| 427 |
+
},
|
| 428 |
+
{
|
| 429 |
+
"speaker": "A",
|
| 430 |
+
"text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
|
| 431 |
+
"original_text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
|
| 432 |
+
"start_time": 22.580509090185526,
|
| 433 |
+
"end_time": 31.798831085650377,
|
| 434 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_4_A.wav",
|
| 435 |
+
"silence_duration": 0.3706207535481718,
|
| 436 |
+
"is_interrupted": false
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"speaker": "B",
|
| 440 |
+
"text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
|
| 441 |
+
"original_text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
|
| 442 |
+
"start_time": 32.37886103861385,
|
| 443 |
+
"end_time": 36.53523292070002,
|
| 444 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_5_B.wav",
|
| 445 |
+
"silence_duration": 0.5800299529634694,
|
| 446 |
+
"is_interrupted": false
|
| 447 |
+
},
|
| 448 |
+
{
|
| 449 |
+
"speaker": "A",
|
| 450 |
+
"text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 451 |
+
"original_text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 452 |
+
"start_time": 36.96218842583218,
|
| 453 |
+
"end_time": 45.90187096551472,
|
| 454 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_6_A.wav",
|
| 455 |
+
"silence_duration": 0.4269555051321636,
|
| 456 |
+
"is_interrupted": false
|
| 457 |
+
}
|
| 458 |
+
]
|
| 459 |
+
},
|
| 460 |
+
"SODA_PROCESSED--train--424960": {
|
| 461 |
+
"original_dialog_id": "",
|
| 462 |
+
"dialog_index": 424960,
|
| 463 |
+
"processed_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB: Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it. \nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 464 |
+
"clean_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB:Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.\nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 465 |
+
"speaker_tracks": {
|
| 466 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/A_track.wav",
|
| 467 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/B_track.wav"
|
| 468 |
+
},
|
| 469 |
+
"error_type": "error_after_interrupt",
|
| 470 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/stereo_dialogue.wav",
|
| 471 |
+
"total_duration": 61.32780045351474,
|
| 472 |
+
"segments": [
|
| 473 |
+
{
|
| 474 |
+
"speaker": "A",
|
| 475 |
+
"text": "So, you've traveled a lot?",
|
| 476 |
+
"original_text": "So, you've traveled a lot?",
|
| 477 |
+
"start_time": 0,
|
| 478 |
+
"end_time": 1.474467120181406,
|
| 479 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_0_A.wav",
|
| 480 |
+
"silence_duration": 0,
|
| 481 |
+
"is_interrupted": false
|
| 482 |
+
},
|
| 483 |
+
{
|
| 484 |
+
"speaker": "B",
|
| 485 |
+
"text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
|
| 486 |
+
"original_text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
|
| 487 |
+
"start_time": 2.068172914767877,
|
| 488 |
+
"end_time": 7.826721667602344,
|
| 489 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_1_B.wav",
|
| 490 |
+
"silence_duration": 0.5937057945864714,
|
| 491 |
+
"is_interrupted": false
|
| 492 |
+
},
|
| 493 |
+
{
|
| 494 |
+
"speaker": "A",
|
| 495 |
+
"text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
|
| 496 |
+
"original_text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
|
| 497 |
+
"start_time": 8.340391189370486,
|
| 498 |
+
"end_time": 18.67327100796459,
|
| 499 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_2_A.wav",
|
| 500 |
+
"silence_duration": 0.5136695217681417,
|
| 501 |
+
"is_interrupted": false
|
| 502 |
+
},
|
| 503 |
+
{
|
| 504 |
+
"speaker": "B",
|
| 505 |
+
"text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new",
|
| 506 |
+
"original_text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.",
|
| 507 |
+
"start_time": 19.06107440491716,
|
| 508 |
+
"end_time": 36.000031321016934,
|
| 509 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_3_B.wav",
|
| 510 |
+
"silence_duration": 0.38780339695257027,
|
| 511 |
+
"is_interrupted": true,
|
| 512 |
+
"text_after_interrupt": "people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it."
|
| 513 |
+
},
|
| 514 |
+
{
|
| 515 |
+
"speaker": "A",
|
| 516 |
+
"text": "Speaking of cultures, which one left the biggest impression on you?",
|
| 517 |
+
"original_text": "Speaking of cultures, which one left the biggest impression on you?",
|
| 518 |
+
"start_time": 29.660983701969315,
|
| 519 |
+
"end_time": 33.329736536436435,
|
| 520 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_4_A.wav",
|
| 521 |
+
"silence_duration": 0.5959806021294678,
|
| 522 |
+
"is_interrupted": false
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"speaker": "B",
|
| 526 |
+
"text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
|
| 527 |
+
"original_text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
|
| 528 |
+
"start_time": 36.35174674763023,
|
| 529 |
+
"end_time": 48.86730230318579,
|
| 530 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_5_B.wav",
|
| 531 |
+
"silence_duration": 0.3517154266132969,
|
| 532 |
+
"is_interrupted": false
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"speaker": "A",
|
| 536 |
+
"text": "It must be wonderful to be able to see the world like that.",
|
| 537 |
+
"original_text": "It must be wonderful to be able to see the world like that.",
|
| 538 |
+
"start_time": 49.35988001043792,
|
| 539 |
+
"end_time": 52.57584372925878,
|
| 540 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_6_A.wav",
|
| 541 |
+
"silence_duration": 0.49257770725213246,
|
| 542 |
+
"is_interrupted": false
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"speaker": "B",
|
| 546 |
+
"text": "It is. I highly recommend it if you ever get the chance.",
|
| 547 |
+
"original_text": "It is. I highly recommend it if you ever get the chance.",
|
| 548 |
+
"start_time": 53.04658029347679,
|
| 549 |
+
"end_time": 56.18127417102781,
|
| 550 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_7_B.wav",
|
| 551 |
+
"silence_duration": 0.4707365642180112,
|
| 552 |
+
"is_interrupted": false
|
| 553 |
+
},
|
| 554 |
+
{
|
| 555 |
+
"speaker": "A",
|
| 556 |
+
"text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 557 |
+
"original_text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 558 |
+
"start_time": 56.57932539417633,
|
| 559 |
+
"end_time": 61.32780611979991,
|
| 560 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_8_A.wav",
|
| 561 |
+
"silence_duration": 0.39805122314851726,
|
| 562 |
+
"is_interrupted": false
|
| 563 |
+
}
|
| 564 |
+
]
|
| 565 |
+
}
|
| 566 |
+
}
|
ms-swift/silence_overlaps/test/test.json
ADDED
|
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"SODA_PROCESSED--train--790538": {
|
| 3 |
+
"original_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA: Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
|
| 4 |
+
"cleaned_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA:Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
|
| 5 |
+
"total_duration": 70.17238095238095,
|
| 6 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/stereo_dialogue.wav",
|
| 7 |
+
"speaker_tracks": {
|
| 8 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/A_track.wav",
|
| 9 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/B_track.wav"
|
| 10 |
+
},
|
| 11 |
+
"error_type": "error_after_interrupt",
|
| 12 |
+
"segments": [
|
| 13 |
+
{
|
| 14 |
+
"speaker": "A",
|
| 15 |
+
"text": "Hey Val, are you all packed for our camping trip?",
|
| 16 |
+
"original_text": "Hey Val, are you all packed for our camping trip?",
|
| 17 |
+
"start_time": 0,
|
| 18 |
+
"end_time": 3.355283446712018,
|
| 19 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_0_A.wav",
|
| 20 |
+
"silence_duration": 0,
|
| 21 |
+
"is_interrupted": false
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"speaker": "B",
|
| 25 |
+
"text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
|
| 26 |
+
"original_text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
|
| 27 |
+
"start_time": 3.7205840462203787,
|
| 28 |
+
"end_time": 8.2368652253587,
|
| 29 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_1_B.wav",
|
| 30 |
+
"silence_duration": 0.3653005995083607,
|
| 31 |
+
"is_interrupted": false
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"speaker": "A",
|
| 35 |
+
"text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of",
|
| 36 |
+
"original_text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.",
|
| 37 |
+
"start_time": 8.610541776107988,
|
| 38 |
+
"end_time": 23.808002093568305,
|
| 39 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_2_A.wav",
|
| 40 |
+
"silence_duration": 0.37367655074928696,
|
| 41 |
+
"is_interrupted": true,
|
| 42 |
+
"text_after_interrupt": "fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience."
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"speaker": "B",
|
| 46 |
+
"text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
|
| 47 |
+
"original_text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
|
| 48 |
+
"start_time": 15.588138147990074,
|
| 49 |
+
"end_time": 19.686460143454926,
|
| 50 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_3_B.wav",
|
| 51 |
+
"silence_duration": 0.407293268908306,
|
| 52 |
+
"is_interrupted": false
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"speaker": "A",
|
| 56 |
+
"text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
|
| 57 |
+
"original_text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
|
| 58 |
+
"start_time": 24.198980395182797,
|
| 59 |
+
"end_time": 30.189728694502527,
|
| 60 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_4_A.wav",
|
| 61 |
+
"silence_duration": 0.39097830161449093,
|
| 62 |
+
"is_interrupted": false
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"speaker": "B",
|
| 66 |
+
"text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
|
| 67 |
+
"original_text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
|
| 68 |
+
"start_time": 30.59422649262823,
|
| 69 |
+
"end_time": 35.60973669670986,
|
| 70 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_5_B.wav",
|
| 71 |
+
"silence_duration": 0.4044977981257025,
|
| 72 |
+
"is_interrupted": false
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"speaker": "A",
|
| 76 |
+
"text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
|
| 77 |
+
"original_text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
|
| 78 |
+
"start_time": 36.08564285355298,
|
| 79 |
+
"end_time": 45.52455441817884,
|
| 80 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_6_A.wav",
|
| 81 |
+
"silence_duration": 0.47590615684312176,
|
| 82 |
+
"is_interrupted": false
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"speaker": "B",
|
| 86 |
+
"text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
|
| 87 |
+
"original_text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
|
| 88 |
+
"start_time": 45.86997259942616,
|
| 89 |
+
"end_time": 50.33981386926743,
|
| 90 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_7_B.wav",
|
| 91 |
+
"silence_duration": 0.34541818124732176,
|
| 92 |
+
"is_interrupted": false
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"speaker": "A",
|
| 96 |
+
"text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
|
| 97 |
+
"original_text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
|
| 98 |
+
"start_time": 50.65173673057719,
|
| 99 |
+
"end_time": 56.66570498454544,
|
| 100 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_8_A.wav",
|
| 101 |
+
"silence_duration": 0.3119228613097595,
|
| 102 |
+
"is_interrupted": false
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"speaker": "B",
|
| 106 |
+
"text": "Definitely. So, what time are we leaving tomorrow morning?",
|
| 107 |
+
"original_text": "Definitely. So, what time are we leaving tomorrow morning?",
|
| 108 |
+
"start_time": 57.18372257432499,
|
| 109 |
+
"end_time": 60.84086543146785,
|
| 110 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_9_B.wav",
|
| 111 |
+
"silence_duration": 0.5180175897795466,
|
| 112 |
+
"is_interrupted": false
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"speaker": "A",
|
| 116 |
+
"text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
|
| 117 |
+
"original_text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
|
| 118 |
+
"start_time": 61.21524274027814,
|
| 119 |
+
"end_time": 67.84453979243234,
|
| 120 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_10_A.wav",
|
| 121 |
+
"silence_duration": 0.37437730881029474,
|
| 122 |
+
"is_interrupted": false
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"speaker": "B",
|
| 126 |
+
"text": "Sounds perfect. See you then!",
|
| 127 |
+
"original_text": "Sounds perfect. See you then!",
|
| 128 |
+
"start_time": 68.44252417070729,
|
| 129 |
+
"end_time": 70.17241079202248,
|
| 130 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_11_B.wav",
|
| 131 |
+
"silence_duration": 0.5979843782749388,
|
| 132 |
+
"is_interrupted": false
|
| 133 |
+
}
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
"SODA_PROCESSED--train--123906": {
|
| 137 |
+
"original_text": "A: You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
|
| 138 |
+
"cleaned_text": "A:You know, it's really annoying when things are loose and they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
|
| 139 |
+
"total_duration": 81.61818594104308,
|
| 140 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/stereo_dialogue.wav",
|
| 141 |
+
"speaker_tracks": {
|
| 142 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/A_track.wav",
|
| 143 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/B_track.wav"
|
| 144 |
+
},
|
| 145 |
+
"error_type": "error_after_interrupt",
|
| 146 |
+
"segments": [
|
| 147 |
+
{
|
| 148 |
+
"speaker": "A",
|
| 149 |
+
"text": "You know, it's really annoying when things are loose and",
|
| 150 |
+
"original_text": "You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.",
|
| 151 |
+
"start_time": 0,
|
| 152 |
+
"end_time": 13.293424036281179,
|
| 153 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_0_A.wav",
|
| 154 |
+
"silence_duration": 0,
|
| 155 |
+
"is_interrupted": true,
|
| 156 |
+
"text_after_interrupt": "they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship."
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"speaker": "B",
|
| 160 |
+
"text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
|
| 161 |
+
"original_text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
|
| 162 |
+
"start_time": 3.250793650793651,
|
| 163 |
+
"end_time": 9.961360544217687,
|
| 164 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_1_B.wav",
|
| 165 |
+
"silence_duration": 0.5919206270367532,
|
| 166 |
+
"is_interrupted": false
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"speaker": "A",
|
| 170 |
+
"text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
|
| 171 |
+
"original_text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
|
| 172 |
+
"start_time": 13.747669310225408,
|
| 173 |
+
"end_time": 19.123088811359196,
|
| 174 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_2_A.wav",
|
| 175 |
+
"silence_duration": 0.4542452739442295,
|
| 176 |
+
"is_interrupted": false
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"speaker": "B",
|
| 180 |
+
"text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
|
| 181 |
+
"original_text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
|
| 182 |
+
"start_time": 19.55223911029212,
|
| 183 |
+
"end_time": 26.39051575428305,
|
| 184 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_3_B.wav",
|
| 185 |
+
"silence_duration": 0.4291502989329248,
|
| 186 |
+
"is_interrupted": false
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"speaker": "A",
|
| 190 |
+
"text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
|
| 191 |
+
"original_text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
|
| 192 |
+
"start_time": 26.849330263466445,
|
| 193 |
+
"end_time": 39.388105773670524,
|
| 194 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_4_A.wav",
|
| 195 |
+
"silence_duration": 0.45881450918339584,
|
| 196 |
+
"is_interrupted": false
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"speaker": "B",
|
| 200 |
+
"text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
|
| 201 |
+
"original_text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
|
| 202 |
+
"start_time": 39.82405615117125,
|
| 203 |
+
"end_time": 51.73589288586513,
|
| 204 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_5_B.wav",
|
| 205 |
+
"silence_duration": 0.4359503775007275,
|
| 206 |
+
"is_interrupted": false
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"speaker": "A",
|
| 210 |
+
"text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
|
| 211 |
+
"original_text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
|
| 212 |
+
"start_time": 52.27556460896347,
|
| 213 |
+
"end_time": 58.811981842523565,
|
| 214 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_6_A.wav",
|
| 215 |
+
"silence_duration": 0.5396717230983441,
|
| 216 |
+
"is_interrupted": false
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"speaker": "B",
|
| 220 |
+
"text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
|
| 221 |
+
"original_text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
|
| 222 |
+
"start_time": 59.3890926661041,
|
| 223 |
+
"end_time": 63.90537384524242,
|
| 224 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_7_B.wav",
|
| 225 |
+
"silence_duration": 0.5771108235805331,
|
| 226 |
+
"is_interrupted": false
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"speaker": "A",
|
| 230 |
+
"text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
|
| 231 |
+
"original_text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
|
| 232 |
+
"start_time": 64.32218282146329,
|
| 233 |
+
"end_time": 72.3911170618261,
|
| 234 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_8_A.wav",
|
| 235 |
+
"silence_duration": 0.4168089762208619,
|
| 236 |
+
"is_interrupted": false
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"speaker": "B",
|
| 240 |
+
"text": "Yeah, you probably should. Thanks for fixing that, by the way.",
|
| 241 |
+
"original_text": "Yeah, you probably should. Thanks for fixing that, by the way.",
|
| 242 |
+
"start_time": 72.7979076035892,
|
| 243 |
+
"end_time": 76.45505046073205,
|
| 244 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_9_B.wav",
|
| 245 |
+
"silence_duration": 0.4067905417630988,
|
| 246 |
+
"is_interrupted": false
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"speaker": "A",
|
| 250 |
+
"text": "No problem. Just doing my part to keep things in working order around here.",
|
| 251 |
+
"original_text": "No problem. Just doing my part to keep things in working order around here.",
|
| 252 |
+
"start_time": 76.90457674986,
|
| 253 |
+
"end_time": 81.6182275435108,
|
| 254 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_10_A.wav",
|
| 255 |
+
"silence_duration": 0.4495262891279488,
|
| 256 |
+
"is_interrupted": false
|
| 257 |
+
}
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
"SODA_PROCESSED--train--1112763": {
|
| 261 |
+
"original_dialog_id": "",
|
| 262 |
+
"dialog_index": 1112763,
|
| 263 |
+
"processed_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA: Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable. \nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
|
| 264 |
+
"clean_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA:Just standing in this pool of water, trying to escape the heat. It's really hot out and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.\nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
|
| 265 |
+
"speaker_tracks": {
|
| 266 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/A_track.wav",
|
| 267 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/B_track.wav"
|
| 268 |
+
},
|
| 269 |
+
"error_type": "error_after_interrupt",
|
| 270 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/stereo_dialogue.wav",
|
| 271 |
+
"total_duration": 80.37741496598639,
|
| 272 |
+
"segments": [
|
| 273 |
+
{
|
| 274 |
+
"speaker": "A",
|
| 275 |
+
"text": "Hey Sarah, what's up?",
|
| 276 |
+
"original_text": "Hey Sarah, what's up?",
|
| 277 |
+
"start_time": 0,
|
| 278 |
+
"end_time": 1.6486167800453515,
|
| 279 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_0_A.wav",
|
| 280 |
+
"silence_duration": 0,
|
| 281 |
+
"is_interrupted": false
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"speaker": "B",
|
| 285 |
+
"text": "Not much, what are you up to?",
|
| 286 |
+
"original_text": "Not much, what are you up to?",
|
| 287 |
+
"start_time": 2.244027328872719,
|
| 288 |
+
"end_time": 3.497904879893127,
|
| 289 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_1_B.wav",
|
| 290 |
+
"silence_duration": 0.5954105488273673,
|
| 291 |
+
"is_interrupted": false
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"speaker": "A",
|
| 295 |
+
"text": "Just standing in this pool of water, trying to escape the heat. It's really hot out",
|
| 296 |
+
"original_text": "Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.",
|
| 297 |
+
"start_time": 3.9748179407694604,
|
| 298 |
+
"end_time": 18.847198893150413,
|
| 299 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_2_A.wav",
|
| 300 |
+
"silence_duration": 0.47691306087633345,
|
| 301 |
+
"is_interrupted": true,
|
| 302 |
+
"text_after_interrupt": "and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable."
|
| 303 |
+
},
|
| 304 |
+
{
|
| 305 |
+
"speaker": "B",
|
| 306 |
+
"text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
|
| 307 |
+
"original_text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
|
| 308 |
+
"start_time": 9.09481794076946,
|
| 309 |
+
"end_time": 15.352595718547239,
|
| 310 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_3_B.wav",
|
| 311 |
+
"silence_duration": 0.5962546040921268,
|
| 312 |
+
"is_interrupted": false
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"speaker": "A",
|
| 316 |
+
"text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
|
| 317 |
+
"original_text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
|
| 318 |
+
"start_time": 19.411937689342565,
|
| 319 |
+
"end_time": 39.3462687551022,
|
| 320 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_4_A.wav",
|
| 321 |
+
"silence_duration": 0.5647387961921536,
|
| 322 |
+
"is_interrupted": false
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"speaker": "B",
|
| 326 |
+
"text": "That sounds nice. I wish I could be in a pool right now too.",
|
| 327 |
+
"original_text": "That sounds nice. I wish I could be in a pool right now too.",
|
| 328 |
+
"start_time": 39.7352726618639,
|
| 329 |
+
"end_time": 42.986066312657556,
|
| 330 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_5_B.wav",
|
| 331 |
+
"silence_duration": 0.3890039067616998,
|
| 332 |
+
"is_interrupted": false
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"speaker": "A",
|
| 336 |
+
"text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
|
| 337 |
+
"original_text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
|
| 338 |
+
"start_time": 43.57081570786076,
|
| 339 |
+
"end_time": 58.803105957293866,
|
| 340 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_6_A.wav",
|
| 341 |
+
"silence_duration": 0.5847493952032004,
|
| 342 |
+
"is_interrupted": false
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"speaker": "B",
|
| 346 |
+
"text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
|
| 347 |
+
"original_text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
|
| 348 |
+
"start_time": 59.111149637610815,
|
| 349 |
+
"end_time": 63.94090020450424,
|
| 350 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_7_B.wav",
|
| 351 |
+
"silence_duration": 0.3080436803169506,
|
| 352 |
+
"is_interrupted": false
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"speaker": "A",
|
| 356 |
+
"text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
|
| 357 |
+
"original_text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
|
| 358 |
+
"start_time": 64.4324265657298,
|
| 359 |
+
"end_time": 78.10897985371166,
|
| 360 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_8_A.wav",
|
| 361 |
+
"silence_duration": 0.4915263612255627,
|
| 362 |
+
"is_interrupted": false
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"speaker": "B",
|
| 366 |
+
"text": "Me too, Cleveland. Me too.",
|
| 367 |
+
"original_text": "Me too, Cleveland. Me too.",
|
| 368 |
+
"start_time": 78.5430769468726,
|
| 369 |
+
"end_time": 80.37745336410616,
|
| 370 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_9_B.wav",
|
| 371 |
+
"silence_duration": 0.4340970931609471,
|
| 372 |
+
"is_interrupted": false
|
| 373 |
+
}
|
| 374 |
+
]
|
| 375 |
+
},
|
| 376 |
+
"SODA_PROCESSED--train--822773": {
|
| 377 |
+
"original_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA: I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 378 |
+
"cleaned_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA:I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 379 |
+
"total_duration": 45.90185941043084,
|
| 380 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/stereo_dialogue.wav",
|
| 381 |
+
"speaker_tracks": {
|
| 382 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/A_track.wav",
|
| 383 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/B_track.wav"
|
| 384 |
+
},
|
| 385 |
+
"error_type": "error_after_interrupt",
|
| 386 |
+
"segments": [
|
| 387 |
+
{
|
| 388 |
+
"speaker": "A",
|
| 389 |
+
"text": "Hey, Sarah! I'm so glad to see you!",
|
| 390 |
+
"original_text": "Hey, Sarah! I'm so glad to see you!",
|
| 391 |
+
"start_time": 0,
|
| 392 |
+
"end_time": 2.7747845804988662,
|
| 393 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_0_A.wav",
|
| 394 |
+
"silence_duration": 0,
|
| 395 |
+
"is_interrupted": false
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"speaker": "B",
|
| 399 |
+
"text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
|
| 400 |
+
"original_text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
|
| 401 |
+
"start_time": 3.1698846088737547,
|
| 402 |
+
"end_time": 7.268206604338607,
|
| 403 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_1_B.wav",
|
| 404 |
+
"silence_duration": 0.39510002837488867,
|
| 405 |
+
"is_interrupted": false
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"speaker": "A",
|
| 409 |
+
"text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with",
|
| 410 |
+
"original_text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.",
|
| 411 |
+
"start_time": 7.859956363848243,
|
| 412 |
+
"end_time": 22.209888336637356,
|
| 413 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_2_A.wav",
|
| 414 |
+
"silence_duration": 0.5917497595096354,
|
| 415 |
+
"is_interrupted": true,
|
| 416 |
+
"text_after_interrupt": "me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities."
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"speaker": "B",
|
| 420 |
+
"text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
|
| 421 |
+
"original_text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
|
| 422 |
+
"start_time": 14.558913279948015,
|
| 423 |
+
"end_time": 19.574423484029648,
|
| 424 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_3_B.wav",
|
| 425 |
+
"silence_duration": 0.35472772157833465,
|
| 426 |
+
"is_interrupted": false
|
| 427 |
+
},
|
| 428 |
+
{
|
| 429 |
+
"speaker": "A",
|
| 430 |
+
"text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
|
| 431 |
+
"original_text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
|
| 432 |
+
"start_time": 22.580509090185526,
|
| 433 |
+
"end_time": 31.798831085650377,
|
| 434 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_4_A.wav",
|
| 435 |
+
"silence_duration": 0.3706207535481718,
|
| 436 |
+
"is_interrupted": false
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"speaker": "B",
|
| 440 |
+
"text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
|
| 441 |
+
"original_text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
|
| 442 |
+
"start_time": 32.37886103861385,
|
| 443 |
+
"end_time": 36.53523292070002,
|
| 444 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_5_B.wav",
|
| 445 |
+
"silence_duration": 0.5800299529634694,
|
| 446 |
+
"is_interrupted": false
|
| 447 |
+
},
|
| 448 |
+
{
|
| 449 |
+
"speaker": "A",
|
| 450 |
+
"text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 451 |
+
"original_text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
|
| 452 |
+
"start_time": 36.96218842583218,
|
| 453 |
+
"end_time": 45.90187096551472,
|
| 454 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_6_A.wav",
|
| 455 |
+
"silence_duration": 0.4269555051321636,
|
| 456 |
+
"is_interrupted": false
|
| 457 |
+
}
|
| 458 |
+
]
|
| 459 |
+
},
|
| 460 |
+
"SODA_PROCESSED--train--424960": {
|
| 461 |
+
"original_dialog_id": "",
|
| 462 |
+
"dialog_index": 424960,
|
| 463 |
+
"processed_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB: Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it. \nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 464 |
+
"clean_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB:Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.\nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 465 |
+
"speaker_tracks": {
|
| 466 |
+
"A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/A_track.wav",
|
| 467 |
+
"B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/B_track.wav"
|
| 468 |
+
},
|
| 469 |
+
"error_type": "error_after_interrupt",
|
| 470 |
+
"stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/stereo_dialogue.wav",
|
| 471 |
+
"total_duration": 61.32780045351474,
|
| 472 |
+
"segments": [
|
| 473 |
+
{
|
| 474 |
+
"speaker": "A",
|
| 475 |
+
"text": "So, you've traveled a lot?",
|
| 476 |
+
"original_text": "So, you've traveled a lot?",
|
| 477 |
+
"start_time": 0,
|
| 478 |
+
"end_time": 1.474467120181406,
|
| 479 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_0_A.wav",
|
| 480 |
+
"silence_duration": 0,
|
| 481 |
+
"is_interrupted": false
|
| 482 |
+
},
|
| 483 |
+
{
|
| 484 |
+
"speaker": "B",
|
| 485 |
+
"text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
|
| 486 |
+
"original_text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
|
| 487 |
+
"start_time": 2.068172914767877,
|
| 488 |
+
"end_time": 7.826721667602344,
|
| 489 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_1_B.wav",
|
| 490 |
+
"silence_duration": 0.5937057945864714,
|
| 491 |
+
"is_interrupted": false
|
| 492 |
+
},
|
| 493 |
+
{
|
| 494 |
+
"speaker": "A",
|
| 495 |
+
"text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
|
| 496 |
+
"original_text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
|
| 497 |
+
"start_time": 8.340391189370486,
|
| 498 |
+
"end_time": 18.67327100796459,
|
| 499 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_2_A.wav",
|
| 500 |
+
"silence_duration": 0.5136695217681417,
|
| 501 |
+
"is_interrupted": false
|
| 502 |
+
},
|
| 503 |
+
{
|
| 504 |
+
"speaker": "B",
|
| 505 |
+
"text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new",
|
| 506 |
+
"original_text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.",
|
| 507 |
+
"start_time": 19.06107440491716,
|
| 508 |
+
"end_time": 36.000031321016934,
|
| 509 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_3_B.wav",
|
| 510 |
+
"silence_duration": 0.38780339695257027,
|
| 511 |
+
"is_interrupted": true,
|
| 512 |
+
"text_after_interrupt": "people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it."
|
| 513 |
+
},
|
| 514 |
+
{
|
| 515 |
+
"speaker": "A",
|
| 516 |
+
"text": "Speaking of cultures, which one left the biggest impression on you?",
|
| 517 |
+
"original_text": "Speaking of cultures, which one left the biggest impression on you?",
|
| 518 |
+
"start_time": 29.660983701969315,
|
| 519 |
+
"end_time": 33.329736536436435,
|
| 520 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_4_A.wav",
|
| 521 |
+
"silence_duration": 0.5959806021294678,
|
| 522 |
+
"is_interrupted": false
|
| 523 |
+
},
|
| 524 |
+
{
|
| 525 |
+
"speaker": "B",
|
| 526 |
+
"text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
|
| 527 |
+
"original_text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
|
| 528 |
+
"start_time": 36.35174674763023,
|
| 529 |
+
"end_time": 48.86730230318579,
|
| 530 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_5_B.wav",
|
| 531 |
+
"silence_duration": 0.3517154266132969,
|
| 532 |
+
"is_interrupted": false
|
| 533 |
+
},
|
| 534 |
+
{
|
| 535 |
+
"speaker": "A",
|
| 536 |
+
"text": "It must be wonderful to be able to see the world like that.",
|
| 537 |
+
"original_text": "It must be wonderful to be able to see the world like that.",
|
| 538 |
+
"start_time": 49.35988001043792,
|
| 539 |
+
"end_time": 52.57584372925878,
|
| 540 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_6_A.wav",
|
| 541 |
+
"silence_duration": 0.49257770725213246,
|
| 542 |
+
"is_interrupted": false
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"speaker": "B",
|
| 546 |
+
"text": "It is. I highly recommend it if you ever get the chance.",
|
| 547 |
+
"original_text": "It is. I highly recommend it if you ever get the chance.",
|
| 548 |
+
"start_time": 53.04658029347679,
|
| 549 |
+
"end_time": 56.18127417102781,
|
| 550 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_7_B.wav",
|
| 551 |
+
"silence_duration": 0.4707365642180112,
|
| 552 |
+
"is_interrupted": false
|
| 553 |
+
},
|
| 554 |
+
{
|
| 555 |
+
"speaker": "A",
|
| 556 |
+
"text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 557 |
+
"original_text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
|
| 558 |
+
"start_time": 56.57932539417633,
|
| 559 |
+
"end_time": 61.32780611979991,
|
| 560 |
+
"audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_8_A.wav",
|
| 561 |
+
"silence_duration": 0.39805122314851726,
|
| 562 |
+
"is_interrupted": false
|
| 563 |
+
}
|
| 564 |
+
]
|
| 565 |
+
}
|
| 566 |
+
}
|
ms-swift/swift/llm/dataset/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import inspect
|
| 3 |
+
|
| 4 |
+
import datasets.fingerprint
|
| 5 |
+
from datasets import Dataset as HfDataset
|
| 6 |
+
|
| 7 |
+
from ..utils import get_temporary_cache_files_directory
|
| 8 |
+
from . import dataset
|
| 9 |
+
from .loader import DATASET_TYPE, load_dataset
|
| 10 |
+
from .media import MediaResource
|
| 11 |
+
from .preprocessor import (AlpacaPreprocessor, AutoPreprocessor, MessagesPreprocessor, ResponsePreprocessor,
|
| 12 |
+
RowPreprocessor)
|
| 13 |
+
from .register import DATASET_MAPPING, DatasetMeta, SubsetDataset, register_dataset, register_dataset_info
|
| 14 |
+
from .utils import (EncodePreprocessor, GetLengthPreprocessor, IterablePackingDataset, LazyLLMDataset, PackingDataset,
|
| 15 |
+
sample_dataset)
|
| 16 |
+
|
| 17 |
+
update_fingerprint_origin = datasets.fingerprint.update_fingerprint
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def update_fingerprint(fingerprint, transform, transform_args):
|
| 21 |
+
if 'function' in transform_args:
|
| 22 |
+
# Calculate the hash using the source code.
|
| 23 |
+
if hasattr(transform_args['function'], '__self__'):
|
| 24 |
+
function = inspect.getsource(transform_args['function'].__self__.__class__)
|
| 25 |
+
else:
|
| 26 |
+
function = inspect.getsource(transform_args['function'])
|
| 27 |
+
transform_args['function'] = (transform_args['function'], function)
|
| 28 |
+
return update_fingerprint_origin(fingerprint, transform, transform_args)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
datasets.fingerprint.update_fingerprint = update_fingerprint
|
| 32 |
+
datasets.arrow_dataset.update_fingerprint = update_fingerprint
|
| 33 |
+
datasets.fingerprint.get_temporary_cache_files_directory = get_temporary_cache_files_directory
|
| 34 |
+
datasets.arrow_dataset.get_temporary_cache_files_directory = get_temporary_cache_files_directory
|
| 35 |
+
register_dataset_info()
|
ms-swift/swift/llm/dataset/__pycache__/loader.cpython-310.pyc
ADDED
|
Binary file (15.9 kB). View file
|
|
|
ms-swift/swift/llm/dataset/__pycache__/media.cpython-310.pyc
ADDED
|
Binary file (5 kB). View file
|
|
|
ms-swift/swift/llm/dataset/dataset/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from . import llm, mllm
|
ms-swift/swift/llm/dataset/dataset/__pycache__/llm.cpython-310.pyc
ADDED
|
Binary file (23.9 kB). View file
|
|
|
ms-swift/swift/llm/dataset/preprocessor/extra.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from .core import ResponsePreprocessor
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class GroundingMixin:
|
| 10 |
+
"""This class offers prompts to the grounding task"""
|
| 11 |
+
task_type: Optional[str] = None
|
| 12 |
+
|
| 13 |
+
_grounding_language_mixin = [0.8, 0.2]
|
| 14 |
+
_grounding_prompts = {
|
| 15 |
+
'grounding': {
|
| 16 |
+
'en': [('<ref-object>', '<bbox>'), ('The positions of <ref-object> is', '<bbox>'),
|
| 17 |
+
('Find the positions of <ref-object>', '<bbox>'), ('Where is <ref-object>', '<bbox>'),
|
| 18 |
+
('Find <ref-object>', '<bbox>'), ('Show me <ref-object>', '<bbox>'),
|
| 19 |
+
('Detect <ref-object>', '<bbox>'), ('Locate <ref-object>', '<bbox>'),
|
| 20 |
+
('Tell me the location of <ref-object>', '<bbox>'), ('Give the location of <ref-object>', '<bbox>'),
|
| 21 |
+
('Provide the bounding box coordinate of <ref-object>', '<bbox>')],
|
| 22 |
+
'zh': [('<ref-object>', '<bbox>'), ('<ref-object>的位置在图片中', '<bbox>'), ('<ref-object>在图片中', '<bbox>'),
|
| 23 |
+
('<ref-object>在', '<bbox>'), ('找到<ref-object>的位置', '<bbox>'), ('<ref-object>在哪里', '<bbox>'),
|
| 24 |
+
('提供<ref-object>的坐标位置', '<bbox>')]
|
| 25 |
+
},
|
| 26 |
+
'caption': {
|
| 27 |
+
'en': [
|
| 28 |
+
('<bbox>', '<ref-object>'),
|
| 29 |
+
('The object at position <bbox>', '<ref-object>'),
|
| 30 |
+
('This <bbox> is', '<ref-object>'),
|
| 31 |
+
('What is the object at <bbox>', '<ref-object>'),
|
| 32 |
+
('Describe <bbox>', '<ref-object>'),
|
| 33 |
+
('<bbox> is', '<ref-object>'),
|
| 34 |
+
('The bounding box coordinate <bbox> contains', '<ref-object>'),
|
| 35 |
+
],
|
| 36 |
+
'zh': [
|
| 37 |
+
('<bbox>', '<ref-object>'),
|
| 38 |
+
('<bbox>是什么', '<ref-object>'),
|
| 39 |
+
('<bbox>的位置包含', '<ref-object>'),
|
| 40 |
+
('描述<bbox>', '<ref-object>'),
|
| 41 |
+
('<bbox>中是', '<ref-object>'),
|
| 42 |
+
('坐标<bbox>描述了什么', '<ref-object>'),
|
| 43 |
+
('描述<bbox>中的事物', '<ref-object>'),
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
def construct_grounding_prompt(self):
|
| 49 |
+
# TODO Only support one bbox to one object
|
| 50 |
+
lang = np.random.choice(['en', 'zh'], p=[0.8, 0.2])
|
| 51 |
+
prompts = GroundingMixin._grounding_prompts[self.task_type][lang]
|
| 52 |
+
query, response = prompts[np.random.choice(range(len(prompts)))]
|
| 53 |
+
return query, response
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class TextGenerationPreprocessor(ResponsePreprocessor):
|
| 57 |
+
|
| 58 |
+
def __init__(self,
|
| 59 |
+
*,
|
| 60 |
+
prompt: str,
|
| 61 |
+
query_tag: str = '{{QUERY}}',
|
| 62 |
+
columns: Optional[Dict[str, str]] = None,
|
| 63 |
+
**kwargs) -> None:
|
| 64 |
+
self.query_tag = query_tag
|
| 65 |
+
self.prompt = prompt
|
| 66 |
+
super().__init__(columns=columns, **kwargs)
|
| 67 |
+
|
| 68 |
+
def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
| 69 |
+
row['query'] = self.prompt.replace(self.query_tag, row['query'])
|
| 70 |
+
return super().preprocess(row)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class ClsGenerationPreprocessor(ResponsePreprocessor):
|
| 74 |
+
|
| 75 |
+
def __init__(self,
|
| 76 |
+
labels: List[str],
|
| 77 |
+
*,
|
| 78 |
+
task: str,
|
| 79 |
+
is_pair_seq: bool = False,
|
| 80 |
+
columns: Optional[Dict[str, str]] = None,
|
| 81 |
+
**kwargs) -> None:
|
| 82 |
+
self.labels = labels
|
| 83 |
+
self.task = task
|
| 84 |
+
self.is_pair_seq = is_pair_seq
|
| 85 |
+
|
| 86 |
+
category = ', '.join(labels)
|
| 87 |
+
self.sentence2_key = 'sentence2'
|
| 88 |
+
self.label_key = 'label'
|
| 89 |
+
if is_pair_seq:
|
| 90 |
+
self.sentence_key = 'sentence1'
|
| 91 |
+
inputs = 'Sentence1: {sentence1}\nSentence2: {sentence2}'
|
| 92 |
+
else:
|
| 93 |
+
self.sentence_key = 'sentence'
|
| 94 |
+
inputs = 'Sentence: {sentence}'
|
| 95 |
+
self.prompt = f"""Task: {task}
|
| 96 |
+
{inputs}
|
| 97 |
+
Category: {category}
|
| 98 |
+
Output:"""
|
| 99 |
+
super().__init__(columns=columns, **kwargs)
|
| 100 |
+
|
| 101 |
+
def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
| 102 |
+
label = row.pop(self.label_key, None)
|
| 103 |
+
if label is None:
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
if self.is_pair_seq:
|
| 107 |
+
query = self.prompt.format(sentence1=row.pop(self.sentence_key), sentence2=row.pop(self.sentence2_key))
|
| 108 |
+
else:
|
| 109 |
+
query = self.prompt.format(sentence=row.pop(self.sentence_key))
|
| 110 |
+
row['query'] = query
|
| 111 |
+
row['response'] = self.labels[int(label)]
|
| 112 |
+
return super().preprocess(row)
|
ms-swift/swift/llm/ds_config/zero2.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fp16": {
|
| 3 |
+
"enabled": "auto",
|
| 4 |
+
"loss_scale": 0,
|
| 5 |
+
"loss_scale_window": 1000,
|
| 6 |
+
"initial_scale_power": 16,
|
| 7 |
+
"hysteresis": 2,
|
| 8 |
+
"min_loss_scale": 1
|
| 9 |
+
},
|
| 10 |
+
|
| 11 |
+
"bf16": {
|
| 12 |
+
"enabled": "auto"
|
| 13 |
+
},
|
| 14 |
+
|
| 15 |
+
"zero_optimization": {
|
| 16 |
+
"stage": 2,
|
| 17 |
+
"offload_optimizer": {
|
| 18 |
+
"device": "none",
|
| 19 |
+
"pin_memory": true
|
| 20 |
+
},
|
| 21 |
+
"allgather_partitions": true,
|
| 22 |
+
"allgather_bucket_size": 2e8,
|
| 23 |
+
"overlap_comm": false,
|
| 24 |
+
"reduce_scatter": true,
|
| 25 |
+
"reduce_bucket_size": 2e8,
|
| 26 |
+
"contiguous_gradients": true
|
| 27 |
+
},
|
| 28 |
+
|
| 29 |
+
"gradient_accumulation_steps": "auto",
|
| 30 |
+
"gradient_clipping": "auto",
|
| 31 |
+
"steps_per_print": 2000,
|
| 32 |
+
"train_batch_size": "auto",
|
| 33 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 34 |
+
"wall_clock_breakdown": false
|
| 35 |
+
}
|
ms-swift/swift/llm/ds_config/zero3.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fp16": {
|
| 3 |
+
"enabled": "auto",
|
| 4 |
+
"loss_scale": 0,
|
| 5 |
+
"loss_scale_window": 1000,
|
| 6 |
+
"initial_scale_power": 16,
|
| 7 |
+
"hysteresis": 2,
|
| 8 |
+
"min_loss_scale": 1
|
| 9 |
+
},
|
| 10 |
+
|
| 11 |
+
"bf16": {
|
| 12 |
+
"enabled": "auto"
|
| 13 |
+
},
|
| 14 |
+
|
| 15 |
+
"zero_optimization": {
|
| 16 |
+
"stage": 3,
|
| 17 |
+
"offload_optimizer": {
|
| 18 |
+
"device": "none",
|
| 19 |
+
"pin_memory": true
|
| 20 |
+
},
|
| 21 |
+
"offload_param": {
|
| 22 |
+
"device": "none",
|
| 23 |
+
"pin_memory": true
|
| 24 |
+
},
|
| 25 |
+
"overlap_comm": false,
|
| 26 |
+
"contiguous_gradients": true,
|
| 27 |
+
"sub_group_size": 1e9,
|
| 28 |
+
"reduce_bucket_size": "auto",
|
| 29 |
+
"zero_quantized_weights": false,
|
| 30 |
+
"zero_quantized_gradients": false,
|
| 31 |
+
"stage3_prefetch_bucket_size": "auto",
|
| 32 |
+
"stage3_param_persistence_threshold": "auto",
|
| 33 |
+
"stage3_max_live_parameters": 1e9,
|
| 34 |
+
"stage3_max_reuse_distance": 1e9,
|
| 35 |
+
"stage3_gather_16bit_weights_on_model_save": true
|
| 36 |
+
},
|
| 37 |
+
|
| 38 |
+
"gradient_accumulation_steps": "auto",
|
| 39 |
+
"gradient_clipping": "auto",
|
| 40 |
+
"steps_per_print": 2000,
|
| 41 |
+
"train_batch_size": "auto",
|
| 42 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 43 |
+
"wall_clock_breakdown": false
|
| 44 |
+
}
|
ms-swift/swift/llm/ds_config/zero3_offload.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fp16": {
|
| 3 |
+
"enabled": "auto",
|
| 4 |
+
"loss_scale": 0,
|
| 5 |
+
"loss_scale_window": 1000,
|
| 6 |
+
"initial_scale_power": 16,
|
| 7 |
+
"hysteresis": 2,
|
| 8 |
+
"min_loss_scale": 1
|
| 9 |
+
},
|
| 10 |
+
|
| 11 |
+
"bf16": {
|
| 12 |
+
"enabled": "auto"
|
| 13 |
+
},
|
| 14 |
+
|
| 15 |
+
"zero_optimization": {
|
| 16 |
+
"stage": 3,
|
| 17 |
+
"offload_optimizer": {
|
| 18 |
+
"device": "cpu",
|
| 19 |
+
"pin_memory": true
|
| 20 |
+
},
|
| 21 |
+
"offload_param": {
|
| 22 |
+
"device": "cpu",
|
| 23 |
+
"pin_memory": true
|
| 24 |
+
},
|
| 25 |
+
"overlap_comm": false,
|
| 26 |
+
"contiguous_gradients": true,
|
| 27 |
+
"sub_group_size": 1e9,
|
| 28 |
+
"reduce_bucket_size": "auto",
|
| 29 |
+
"stage3_prefetch_bucket_size": "auto",
|
| 30 |
+
"stage3_param_persistence_threshold": "auto",
|
| 31 |
+
"stage3_max_live_parameters": 1e9,
|
| 32 |
+
"stage3_max_reuse_distance": 1e9,
|
| 33 |
+
"stage3_gather_16bit_weights_on_model_save": true
|
| 34 |
+
},
|
| 35 |
+
|
| 36 |
+
"gradient_accumulation_steps": "auto",
|
| 37 |
+
"gradient_clipping": "auto",
|
| 38 |
+
"steps_per_print": 2000,
|
| 39 |
+
"train_batch_size": "auto",
|
| 40 |
+
"train_micro_batch_size_per_gpu": "auto",
|
| 41 |
+
"wall_clock_breakdown": false
|
| 42 |
+
}
|
ms-swift/swift/llm/eval/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from .eval import SwiftEval, eval_main
|
ms-swift/swift/llm/eval/utils.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import asdict
|
| 2 |
+
from typing import Any, Dict, List, Union
|
| 3 |
+
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
from evalscope.models.custom import CustomModel
|
| 6 |
+
from transformers import PreTrainedModel
|
| 7 |
+
|
| 8 |
+
from ..infer import PtEngine, RequestConfig
|
| 9 |
+
from ..template import InferRequest
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class EvalModel(CustomModel):
|
| 13 |
+
|
| 14 |
+
def __init__(self, model: Union[PreTrainedModel, nn.Module], template, max_batch_size, model_name: str,
|
| 15 |
+
**kwargs) -> None:
|
| 16 |
+
super().__init__(config={'model_id': model_name}, **kwargs)
|
| 17 |
+
self.model_name = model_name
|
| 18 |
+
self.model = model
|
| 19 |
+
self.template = template
|
| 20 |
+
self.engine = PtEngine.from_model_template(model, template, max_batch_size=max_batch_size)
|
| 21 |
+
|
| 22 |
+
def predict(self, prompts: List[dict], **kwargs) -> List[Dict[str, Any]]:
|
| 23 |
+
# use origin inputs
|
| 24 |
+
infer_requests = self.prepare_inputs(kwargs.get('origin_inputs', prompts))
|
| 25 |
+
|
| 26 |
+
infer_cfg = kwargs['infer_cfg'].copy()
|
| 27 |
+
generation_config = RequestConfig(**infer_cfg)
|
| 28 |
+
|
| 29 |
+
response = self.engine.infer(infer_requests=infer_requests, request_config=generation_config, use_tqdm=False)
|
| 30 |
+
dict_response = [asdict(item) for item in response]
|
| 31 |
+
return dict_response
|
| 32 |
+
|
| 33 |
+
def prepare_inputs(self, prompts: Union[List[dict], List[str]]) -> List[InferRequest]:
|
| 34 |
+
infer_requests = []
|
| 35 |
+
for input_item in prompts:
|
| 36 |
+
if isinstance(input_item, str):
|
| 37 |
+
query = input_item
|
| 38 |
+
system_prompt = None
|
| 39 |
+
else:
|
| 40 |
+
data: list = input_item['data']
|
| 41 |
+
if isinstance(data[0], tuple): # for truthful_qa and hellaswag
|
| 42 |
+
query = '\n'.join(''.join(item) for item in data)
|
| 43 |
+
system_prompt = input_item.get('system_prompt', None)
|
| 44 |
+
else:
|
| 45 |
+
query = data[0]
|
| 46 |
+
system_prompt = input_item.get('system_prompt', None)
|
| 47 |
+
# prepare messages
|
| 48 |
+
messages = []
|
| 49 |
+
if system_prompt:
|
| 50 |
+
messages.append({'role': 'system', 'content': system_prompt})
|
| 51 |
+
messages.append({'role': 'user', 'content': query})
|
| 52 |
+
infer_requests.append(InferRequest(messages=messages))
|
| 53 |
+
return infer_requests
|
ms-swift/swift/llm/export/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from .export import SwiftExport, export_main
|
| 3 |
+
from .merge_lora import merge_lora
|
| 4 |
+
from .ollama import export_to_ollama
|
| 5 |
+
from .quant import quantize_model
|
ms-swift/swift/llm/infer/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from typing import TYPE_CHECKING
|
| 3 |
+
|
| 4 |
+
from swift.utils.import_utils import _LazyModule
|
| 5 |
+
|
| 6 |
+
if TYPE_CHECKING:
|
| 7 |
+
from .infer import infer_main, SwiftInfer
|
| 8 |
+
from .rollout import rollout_main
|
| 9 |
+
from .deploy import deploy_main, SwiftDeploy, run_deploy
|
| 10 |
+
from .protocol import RequestConfig, Function
|
| 11 |
+
from .utils import prepare_model_template
|
| 12 |
+
from .infer_engine import (InferEngine, VllmEngine, LmdeployEngine, PtEngine, InferClient,
|
| 13 |
+
prepare_generation_config, AdapterRequest, BaseInferEngine)
|
| 14 |
+
else:
|
| 15 |
+
_import_structure = {
|
| 16 |
+
'rollout': ['rollout_main'],
|
| 17 |
+
'infer': ['infer_main', 'SwiftInfer'],
|
| 18 |
+
'deploy': ['deploy_main', 'SwiftDeploy', 'run_deploy'],
|
| 19 |
+
'protocol': ['RequestConfig', 'Function'],
|
| 20 |
+
'utils': ['prepare_model_template'],
|
| 21 |
+
'infer_engine': [
|
| 22 |
+
'InferEngine', 'VllmEngine', 'LmdeployEngine', 'PtEngine', 'InferClient', 'prepare_generation_config',
|
| 23 |
+
'AdapterRequest', 'BaseInferEngine'
|
| 24 |
+
],
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
import sys
|
| 28 |
+
|
| 29 |
+
sys.modules[__name__] = _LazyModule(
|
| 30 |
+
__name__,
|
| 31 |
+
globals()['__file__'],
|
| 32 |
+
_import_structure,
|
| 33 |
+
module_spec=__spec__,
|
| 34 |
+
extra_objects={},
|
| 35 |
+
)
|
ms-swift/swift/llm/infer/infer_engine/__pycache__/utils.cpython-310.pyc
ADDED
|
Binary file (21.5 kB). View file
|
|
|
ms-swift/swift/llm/model/__pycache__/model_arch.cpython-310.pyc
ADDED
|
Binary file (9.6 kB). View file
|
|
|
ms-swift/swift/llm/model/model/__pycache__/gemma.cpython-310.pyc
ADDED
|
Binary file (4.81 kB). View file
|
|
|
ms-swift/swift/llm/model/model/__pycache__/glm.cpython-310.pyc
ADDED
|
Binary file (8.51 kB). View file
|
|
|
ms-swift/swift/llm/model/model/__pycache__/llama.cpython-310.pyc
ADDED
|
Binary file (10 kB). View file
|
|
|
ms-swift/swift/llm/model/model/__pycache__/llava.cpython-310.pyc
ADDED
|
Binary file (7.59 kB). View file
|
|
|
ms-swift/swift/llm/model/model/__pycache__/mistral.cpython-310.pyc
ADDED
|
Binary file (3.87 kB). View file
|
|
|
ms-swift/swift/llm/model/model/baichuan.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from types import MethodType
|
| 3 |
+
from typing import Any, Dict
|
| 4 |
+
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from torch import Tensor
|
| 7 |
+
from transformers import AutoConfig
|
| 8 |
+
|
| 9 |
+
from swift.llm import TemplateType
|
| 10 |
+
from swift.utils import get_logger
|
| 11 |
+
from ..constant import LLMModelType
|
| 12 |
+
from ..model_arch import ModelArch
|
| 13 |
+
from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
|
| 14 |
+
from ..utils import ModelInfo
|
| 15 |
+
|
| 16 |
+
logger = get_logger()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_model_tokenizer_baichuan(model_dir: str,
|
| 20 |
+
model_info: ModelInfo,
|
| 21 |
+
model_kwargs: Dict[str, Any],
|
| 22 |
+
load_model: bool = True,
|
| 23 |
+
**kwargs):
|
| 24 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, model_info, model_kwargs, load_model, **kwargs)
|
| 25 |
+
# baichuan-13b does not implement the `get_input_embeddings` function
|
| 26 |
+
# fix gradient_checkpointing bug
|
| 27 |
+
try:
|
| 28 |
+
if model is not None:
|
| 29 |
+
model.get_input_embeddings()
|
| 30 |
+
except NotImplementedError:
|
| 31 |
+
model.__class__.get_input_embeddings = lambda self: self.model.embed_tokens
|
| 32 |
+
return model, tokenizer
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
register_model(
|
| 36 |
+
ModelMeta(
|
| 37 |
+
LLMModelType.baichuan, [
|
| 38 |
+
ModelGroup([
|
| 39 |
+
Model('baichuan-inc/Baichuan-13B-Chat', 'baichuan-inc/Baichuan-13B-Chat'),
|
| 40 |
+
Model('baichuan-inc/Baichuan-13B-Base', 'baichuan-inc/Baichuan-13B-Base'),
|
| 41 |
+
Model('baichuan-inc/baichuan-7B', 'baichuan-inc/Baichuan-7B'),
|
| 42 |
+
]),
|
| 43 |
+
],
|
| 44 |
+
TemplateType.baichuan,
|
| 45 |
+
get_model_tokenizer_baichuan,
|
| 46 |
+
architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
|
| 47 |
+
model_arch=ModelArch.baichuan,
|
| 48 |
+
requires=['transformers<4.34']))
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def get_model_tokenizer_baichuan_m1(model_dir: str,
|
| 52 |
+
model_info: ModelInfo,
|
| 53 |
+
model_kwargs: Dict[str, Any],
|
| 54 |
+
load_model: bool = True,
|
| 55 |
+
**kwargs):
|
| 56 |
+
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
| 57 |
+
rotary_embedding = get_class_from_dynamic_module('modeling_baichuan.RotaryEmbedding', model_dir)
|
| 58 |
+
_old_forward = rotary_embedding.forward
|
| 59 |
+
|
| 60 |
+
def _new_forward(self, q, k, seqlen_offset=None, cu_seqlens=None, max_seqlen=None):
|
| 61 |
+
q = q.to(k.dtype)
|
| 62 |
+
res = _old_forward(self, q, k, seqlen_offset, cu_seqlens, max_seqlen)
|
| 63 |
+
return res
|
| 64 |
+
|
| 65 |
+
rotary_embedding.forward = _new_forward
|
| 66 |
+
|
| 67 |
+
model, tokenizer = get_model_tokenizer_baichuan(model_dir, model_info, model_kwargs, load_model, **kwargs)
|
| 68 |
+
return model, tokenizer
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
register_model(
|
| 72 |
+
ModelMeta(
|
| 73 |
+
LLMModelType.baichuan_m1, [
|
| 74 |
+
ModelGroup([
|
| 75 |
+
Model('baichuan-inc/Baichuan-M1-14B-Instruct', 'baichuan-inc/Baichuan-M1-14B-Instruct'),
|
| 76 |
+
]),
|
| 77 |
+
],
|
| 78 |
+
TemplateType.baichuan_m1,
|
| 79 |
+
get_model_tokenizer_baichuan_m1,
|
| 80 |
+
architectures=['BaichuanM1ForCausalLM'],
|
| 81 |
+
model_arch=ModelArch.baichuan,
|
| 82 |
+
requires=['transformers>=4.48']))
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def patch_baichuan2_lm_head_forward(self, hidden_states: Tensor) -> Tensor:
|
| 86 |
+
# patch: baichuan2 lm_head (fp32 bug)
|
| 87 |
+
if self.training:
|
| 88 |
+
norm_weight = F.normalize(self.weight).to(self.weight.dtype)
|
| 89 |
+
elif self.first_flag:
|
| 90 |
+
self.first_flag = False
|
| 91 |
+
self.weight.data = F.normalize(self.weight).to(self.weight.dtype)
|
| 92 |
+
norm_weight = self.weight
|
| 93 |
+
else:
|
| 94 |
+
norm_weight = self.weight
|
| 95 |
+
return F.linear(hidden_states, norm_weight)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def get_model_tokenizer_baichuan2(model_dir: str,
|
| 99 |
+
model_info: ModelInfo,
|
| 100 |
+
model_kwargs: Dict[str, Any],
|
| 101 |
+
load_model: bool = True,
|
| 102 |
+
model_config=None,
|
| 103 |
+
**kwargs):
|
| 104 |
+
if model_config is None:
|
| 105 |
+
model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
|
| 106 |
+
if not hasattr(model_config, 'z_loss_weight'):
|
| 107 |
+
model_config.z_loss_weight = 0
|
| 108 |
+
# patch: baichuan2_13b configuration_baichuan.py bug
|
| 109 |
+
if hasattr(model_config, 'gradient_checkpointing'):
|
| 110 |
+
gradient_checkpointing = model_config.gradient_checkpointing
|
| 111 |
+
if isinstance(gradient_checkpointing, (tuple, list)):
|
| 112 |
+
model_config.gradient_checkpointing = gradient_checkpointing[0]
|
| 113 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(
|
| 114 |
+
model_dir, model_info, model_kwargs, load_model, model_config=model_config, **kwargs)
|
| 115 |
+
model_ori = model
|
| 116 |
+
if model is not None:
|
| 117 |
+
if not hasattr(model, 'lm_head'): # fix awq
|
| 118 |
+
model = model.model
|
| 119 |
+
new_forward = MethodType(patch_baichuan2_lm_head_forward, model.lm_head)
|
| 120 |
+
if hasattr(model, '_old_forward'): # device_map
|
| 121 |
+
model.lm_head._old_forward = new_forward
|
| 122 |
+
else:
|
| 123 |
+
model.lm_head.forward = new_forward
|
| 124 |
+
return model_ori, tokenizer
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
register_model(
|
| 128 |
+
ModelMeta(
|
| 129 |
+
LLMModelType.baichuan2,
|
| 130 |
+
[
|
| 131 |
+
ModelGroup([
|
| 132 |
+
Model('baichuan-inc/Baichuan2-7B-Chat', 'baichuan-inc/Baichuan2-7B-Chat'),
|
| 133 |
+
Model('baichuan-inc/Baichuan2-7B-Base', 'baichuan-inc/Baichuan2-7B-Base'),
|
| 134 |
+
Model('baichuan-inc/Baichuan2-13B-Chat', 'baichuan-inc/Baichuan2-13B-Chat'),
|
| 135 |
+
Model('baichuan-inc/Baichuan2-13B-Base', 'baichuan-inc/Baichuan2-13B-Base'),
|
| 136 |
+
]),
|
| 137 |
+
ModelGroup([
|
| 138 |
+
Model('baichuan-inc/Baichuan2-7B-Chat-4bits', 'baichuan-inc/Baichuan2-7B-Chat-4bits'),
|
| 139 |
+
Model('baichuan-inc/Baichuan2-13B-Chat-4bits', 'baichuan-inc/Baichuan2-13B-Chat-4bits'),
|
| 140 |
+
],
|
| 141 |
+
requires=['bitsandbytes<0.41.2', 'accelerate<0.26'])
|
| 142 |
+
],
|
| 143 |
+
TemplateType.baichuan,
|
| 144 |
+
get_model_tokenizer_baichuan2,
|
| 145 |
+
architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
|
| 146 |
+
model_arch=ModelArch.baichuan,
|
| 147 |
+
))
|
ms-swift/swift/llm/model/model/mamba.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from typing import Any, Dict
|
| 3 |
+
|
| 4 |
+
from swift.llm import TemplateType
|
| 5 |
+
from swift.utils import get_logger
|
| 6 |
+
from ..constant import LLMModelType
|
| 7 |
+
from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model
|
| 8 |
+
from ..utils import ModelInfo
|
| 9 |
+
|
| 10 |
+
logger = get_logger()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_model_tokenizer_mamba(model_dir: str,
|
| 14 |
+
model_info: ModelInfo,
|
| 15 |
+
model_kwargs: Dict[str, Any],
|
| 16 |
+
load_model: bool = True,
|
| 17 |
+
**kwargs):
|
| 18 |
+
logger.info('[IMPORTANT] Remember installing causal-conv1d>=1.2.0 and mamba-ssm, or you training and inference will'
|
| 19 |
+
'be really slow!')
|
| 20 |
+
return get_model_tokenizer_from_local(model_dir, model_info, model_kwargs, load_model, **kwargs)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
register_model(
|
| 24 |
+
ModelMeta(
|
| 25 |
+
LLMModelType.mamba,
|
| 26 |
+
[
|
| 27 |
+
ModelGroup([
|
| 28 |
+
Model('AI-ModelScope/mamba-130m-hf', 'state-spaces/mamba-130m-hf'),
|
| 29 |
+
Model('AI-ModelScope/mamba-370m-hf', 'state-spaces/mamba-370m-hf'),
|
| 30 |
+
Model('AI-ModelScope/mamba-390m-hf', 'state-spaces/mamba-390m-hf'),
|
| 31 |
+
Model('AI-ModelScope/mamba-790m-hf', 'state-spaces/mamba-790m-hf'),
|
| 32 |
+
Model('AI-ModelScope/mamba-1.4b-hf', 'state-spaces/mamba-1.4b-hf'),
|
| 33 |
+
Model('AI-ModelScope/mamba-2.8b-hf', 'state-spaces/mamba-2.8b-hf'),
|
| 34 |
+
])
|
| 35 |
+
],
|
| 36 |
+
TemplateType.default,
|
| 37 |
+
get_model_tokenizer_mamba,
|
| 38 |
+
architectures=['MambaForCausalLM'],
|
| 39 |
+
model_arch=None,
|
| 40 |
+
requires=['transformers>=4.39.0'],
|
| 41 |
+
))
|
ms-swift/swift/llm/model/model/stepfun.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import sys
|
| 5 |
+
|
| 6 |
+
from transformers import AutoModel
|
| 7 |
+
|
| 8 |
+
from swift.llm import TemplateType
|
| 9 |
+
from ..constant import MLLMModelType
|
| 10 |
+
from ..model_arch import ModelArch
|
| 11 |
+
from ..register import (Model, ModelGroup, ModelMeta, get_model_tokenizer_multimodal,
|
| 12 |
+
get_model_tokenizer_with_flash_attn, register_model)
|
| 13 |
+
from ..utils import git_clone_github, safe_snapshot_download
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_model_tokenizer_got_ocr2(*args, **kwargs):
|
| 17 |
+
kwargs['automodel_class'] = AutoModel
|
| 18 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
|
| 19 |
+
return model, tokenizer
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
register_model(
|
| 23 |
+
ModelMeta(
|
| 24 |
+
MLLMModelType.got_ocr2, [
|
| 25 |
+
ModelGroup([
|
| 26 |
+
Model('stepfun-ai/GOT-OCR2_0', 'stepfun-ai/GOT-OCR2_0'),
|
| 27 |
+
]),
|
| 28 |
+
],
|
| 29 |
+
TemplateType.got_ocr2,
|
| 30 |
+
get_model_tokenizer_got_ocr2,
|
| 31 |
+
model_arch=ModelArch.got_ocr2,
|
| 32 |
+
architectures=['GOTQwenForCausalLM'],
|
| 33 |
+
tags=['vision']))
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def get_model_tokenizer_got_ocr2_hf(model_dir, *args, **kwargs):
|
| 37 |
+
from transformers.models.got_ocr2 import GotOcr2ForConditionalGeneration
|
| 38 |
+
GotOcr2ForConditionalGeneration._no_split_modules.append('GotOcr2VisionLayer')
|
| 39 |
+
model, processor = get_model_tokenizer_multimodal(model_dir, *args, **kwargs)
|
| 40 |
+
return model, processor
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
register_model(
|
| 44 |
+
ModelMeta(
|
| 45 |
+
MLLMModelType.got_ocr2_hf, [
|
| 46 |
+
ModelGroup([
|
| 47 |
+
Model('stepfun-ai/GOT-OCR-2.0-hf', 'stepfun-ai/GOT-OCR-2.0-hf'),
|
| 48 |
+
]),
|
| 49 |
+
],
|
| 50 |
+
TemplateType.got_ocr2_hf,
|
| 51 |
+
get_model_tokenizer_got_ocr2_hf,
|
| 52 |
+
model_arch=ModelArch.got_ocr2_hf,
|
| 53 |
+
architectures=['GOTQwenForCausalLM'],
|
| 54 |
+
tags=['vision']))
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_model_tokenizer_step_audio(*args, **kwargs):
|
| 58 |
+
local_repo_path = kwargs.get('local_repo_path')
|
| 59 |
+
if not local_repo_path:
|
| 60 |
+
local_repo_path = git_clone_github('https://github.com/stepfun-ai/Step-Audio.git')
|
| 61 |
+
sys.path.append(local_repo_path)
|
| 62 |
+
from tokenizer import StepAudioTokenizer
|
| 63 |
+
encoder_path = safe_snapshot_download('stepfun-ai/Step-Audio-Tokenizer', check_local=True)
|
| 64 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
|
| 65 |
+
if model is not None:
|
| 66 |
+
model.encoder = StepAudioTokenizer(encoder_path)
|
| 67 |
+
# from tts import StepAudioTTS
|
| 68 |
+
# if not os.path.exists('speakers'):
|
| 69 |
+
# shutil.copytree(os.path.join(local_repo_path, 'speakers'), 'speakers')
|
| 70 |
+
# decoder_path = safe_snapshot_download('stepfun-ai/Step-Audio-TTS-3B', check_local=True)
|
| 71 |
+
# model.decoder = StepAudioTTS(decoder_path, model.encoder)
|
| 72 |
+
return model, tokenizer
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
register_model(
|
| 76 |
+
ModelMeta(
|
| 77 |
+
MLLMModelType.step_audio, [
|
| 78 |
+
ModelGroup([
|
| 79 |
+
Model('stepfun-ai/Step-Audio-Chat', 'stepfun-ai/Step-Audio-Chat'),
|
| 80 |
+
]),
|
| 81 |
+
],
|
| 82 |
+
TemplateType.step_audio,
|
| 83 |
+
get_model_tokenizer_step_audio,
|
| 84 |
+
architectures=['Step1ForCausalLM'],
|
| 85 |
+
requires=['funasr', 'sox', 'conformer', 'openai-whisper', 'librosa'],
|
| 86 |
+
tags=['audio']))
|
ms-swift/swift/llm/model/model/telechat.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
|
| 3 |
+
from transformers import GenerationConfig
|
| 4 |
+
|
| 5 |
+
from swift.llm import TemplateType
|
| 6 |
+
from ..constant import LLMModelType
|
| 7 |
+
from ..model_arch import ModelArch
|
| 8 |
+
from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_model_tokenizer_telechat(*args, **kwargs):
|
| 12 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
|
| 13 |
+
model_dir = args[0]
|
| 14 |
+
generation_config = GenerationConfig.from_pretrained(model_dir)
|
| 15 |
+
for k in ['bos_token_id', 'eos_token_id', 'pad_token_id', 'user_token_id', 'bot_token_id']:
|
| 16 |
+
setattr(tokenizer, k, getattr(generation_config, k))
|
| 17 |
+
return model, tokenizer
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
register_model(
|
| 21 |
+
ModelMeta(
|
| 22 |
+
LLMModelType.telechat,
|
| 23 |
+
[
|
| 24 |
+
ModelGroup([
|
| 25 |
+
Model('TeleAI/TeleChat-7B', 'Tele-AI/telechat-7B'),
|
| 26 |
+
Model('TeleAI/TeleChat-12B', 'Tele-AI/TeleChat-12B'),
|
| 27 |
+
Model('TeleAI/TeleChat-12B-v2', 'Tele-AI/TeleChat-12B-v2'),
|
| 28 |
+
Model('TeleAI/TeleChat-52B', 'TeleAI/TeleChat-52B'),
|
| 29 |
+
]),
|
| 30 |
+
ModelGroup([
|
| 31 |
+
Model('swift/TeleChat-12B-V2-GPTQ-Int4'),
|
| 32 |
+
]),
|
| 33 |
+
ModelGroup([
|
| 34 |
+
Model('TeleAI/TeleChat2-35B', 'Tele-AI/TeleChat2-35B'),
|
| 35 |
+
Model('TeleAI/TeleChat2-115B', 'Tele-AI/TeleChat2-115B'),
|
| 36 |
+
]),
|
| 37 |
+
],
|
| 38 |
+
TemplateType.telechat,
|
| 39 |
+
get_model_tokenizer_telechat,
|
| 40 |
+
model_arch=ModelArch.telechat,
|
| 41 |
+
architectures=['TelechatForCausalLM', 'TeleChatForCausalLM'],
|
| 42 |
+
))
|
| 43 |
+
|
| 44 |
+
register_model(
|
| 45 |
+
ModelMeta(
|
| 46 |
+
LLMModelType.telechat2,
|
| 47 |
+
[
|
| 48 |
+
ModelGroup([
|
| 49 |
+
Model('TeleAI/TeleChat2-3B', 'Tele-AI/TeleChat2-3B'),
|
| 50 |
+
Model('TeleAI/TeleChat2-7B-32K', 'Tele-AI/TeleChat2-7B-32K'),
|
| 51 |
+
Model('TeleAI/TeleChat2-35B-32K', 'Tele-AI/TeleChat2-35B-32K'),
|
| 52 |
+
Model('TeleAI/TeleChat2-35B-Nov', 'Tele-AI/TeleChat2-35B-Nov'),
|
| 53 |
+
]),
|
| 54 |
+
],
|
| 55 |
+
TemplateType.telechat2,
|
| 56 |
+
get_model_tokenizer_telechat,
|
| 57 |
+
model_arch=ModelArch.telechat,
|
| 58 |
+
architectures=['TeleChat2ForCausalLM'],
|
| 59 |
+
))
|
ms-swift/swift/llm/model/model/valley.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
from functools import partial, wraps
|
| 5 |
+
from typing import Any, Dict
|
| 6 |
+
|
| 7 |
+
from swift.llm import TemplateType
|
| 8 |
+
from ..constant import MLLMModelType
|
| 9 |
+
from ..model_arch import ModelArch
|
| 10 |
+
from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
|
| 11 |
+
from ..utils import ModelInfo, git_clone_github, safe_snapshot_download
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_model_tokenizer_valley(model_dir: str,
|
| 15 |
+
model_info: ModelInfo,
|
| 16 |
+
model_kwargs: Dict[str, Any],
|
| 17 |
+
load_model: bool = True,
|
| 18 |
+
**kwargs):
|
| 19 |
+
llm_model_type = kwargs.pop('llm_model_type')
|
| 20 |
+
local_repo_path = kwargs.get('local_repo_path')
|
| 21 |
+
if not local_repo_path:
|
| 22 |
+
repo_path = 'https://github.com/bytedance/Valley.git'
|
| 23 |
+
local_repo_path = git_clone_github(repo_path)
|
| 24 |
+
sys.path.append(local_repo_path)
|
| 25 |
+
|
| 26 |
+
if llm_model_type == 'valley':
|
| 27 |
+
from transformers.modeling_outputs import CausalLMOutputWithPast
|
| 28 |
+
from valley_eagle.model.language_model.valley_qwen2 import ValleyQwen2ForCausalLM, ValleyConfig
|
| 29 |
+
model_config = ValleyConfig.from_pretrained(model_dir)
|
| 30 |
+
model_config.mm_vision_tower = safe_snapshot_download(
|
| 31 |
+
'AI-ModelScope/siglip-so400m-patch14-384', check_local=True)
|
| 32 |
+
model_config.eagle_vision_tower = safe_snapshot_download('Qwen/Qwen2-VL-7B-Instruct', check_local=True)
|
| 33 |
+
automodel_class = ValleyQwen2ForCausalLM
|
| 34 |
+
|
| 35 |
+
if not hasattr(ValleyQwen2ForCausalLM, '_origin_forward'):
|
| 36 |
+
forward = ValleyQwen2ForCausalLM.forward
|
| 37 |
+
ValleyQwen2ForCausalLM._origin_forward = forward
|
| 38 |
+
|
| 39 |
+
@wraps(forward)
|
| 40 |
+
def new_forward(*args, **kwargs):
|
| 41 |
+
import torch
|
| 42 |
+
outputs = forward(*args, **kwargs)
|
| 43 |
+
loss = outputs.loss
|
| 44 |
+
if loss is not None and loss.shape[-1] > 0:
|
| 45 |
+
loss = torch.mean(loss, dim=-1)
|
| 46 |
+
return CausalLMOutputWithPast(
|
| 47 |
+
loss=loss,
|
| 48 |
+
logits=outputs.logits,
|
| 49 |
+
past_key_values=outputs.past_key_values,
|
| 50 |
+
hidden_states=outputs.hidden_states,
|
| 51 |
+
attentions=outputs.attentions,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
ValleyQwen2ForCausalLM.forward = new_forward
|
| 55 |
+
kwargs['model_config'] = model_config
|
| 56 |
+
kwargs['automodel_class'] = automodel_class
|
| 57 |
+
model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, model_info, model_kwargs, load_model, **kwargs)
|
| 58 |
+
if model is not None:
|
| 59 |
+
model.generation_config.repetition_penalty = 1.0 # Otherwise, Error. Same for original code.
|
| 60 |
+
from transformers import AutoProcessor, SiglipImageProcessor
|
| 61 |
+
tokenizer.image_processor = SiglipImageProcessor.from_pretrained(model.config.mm_vision_tower)
|
| 62 |
+
tokenizer.qwen2vl_processor = AutoProcessor.from_pretrained(
|
| 63 |
+
model.config.eagle_vision_tower, max_pixels=1280 * 28 * 28)
|
| 64 |
+
tokenizer.image_processor.crop_size = tokenizer.image_processor.size['height']
|
| 65 |
+
return model, tokenizer
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
register_model(
|
| 69 |
+
ModelMeta(
|
| 70 |
+
MLLMModelType.valley,
|
| 71 |
+
[
|
| 72 |
+
ModelGroup([
|
| 73 |
+
Model('bytedance-research/Valley-Eagle-7B'),
|
| 74 |
+
], ),
|
| 75 |
+
],
|
| 76 |
+
TemplateType.valley,
|
| 77 |
+
partial(get_model_tokenizer_valley, llm_model_type='valley'),
|
| 78 |
+
architectures=['ValleyQwen2ForCausalLM'],
|
| 79 |
+
model_arch=ModelArch.valley,
|
| 80 |
+
requires=['transformers>=4.42', 'av'],
|
| 81 |
+
tags=['vision'],
|
| 82 |
+
))
|
ms-swift/swift/llm/model/patcher.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import os
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
from functools import wraps
|
| 5 |
+
from types import MethodType
|
| 6 |
+
from typing import Dict, List, Optional, Union
|
| 7 |
+
|
| 8 |
+
import accelerate
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn as nn
|
| 11 |
+
import transformers
|
| 12 |
+
from accelerate.utils import find_device
|
| 13 |
+
from packaging import version
|
| 14 |
+
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
| 15 |
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
| 16 |
+
from transformers import PreTrainedModel, dynamic_module_utils, trainer
|
| 17 |
+
from transformers.modeling_outputs import SequenceClassifierOutputWithPast
|
| 18 |
+
|
| 19 |
+
from swift.llm import to_device, to_float_dtype
|
| 20 |
+
from swift.utils import get_dist_setting, get_logger, is_mp_ddp, safe_ddp_context, use_torchacc
|
| 21 |
+
from swift.utils.torch_utils import _get_max_memory, _sync_max_memory, get_device_count
|
| 22 |
+
from .model_arch import get_model_arch
|
| 23 |
+
from .utils import HfConfigFactory
|
| 24 |
+
|
| 25 |
+
logger = get_logger()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def patch_fixed_float_dtype(module: torch.nn.Module, dtype):
|
| 29 |
+
"""Patch the module, to make sure the consisitent dtype."""
|
| 30 |
+
|
| 31 |
+
def get_float_dtype_hook(dtype):
|
| 32 |
+
|
| 33 |
+
def _float_dtype_hook(module, input, output):
|
| 34 |
+
return to_float_dtype(output, dtype)
|
| 35 |
+
|
| 36 |
+
return _float_dtype_hook
|
| 37 |
+
|
| 38 |
+
module.register_forward_hook(get_float_dtype_hook(dtype))
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def patch_fixed_device(module: torch.nn.Module, device):
|
| 42 |
+
"""Move the output to the specific device"""
|
| 43 |
+
|
| 44 |
+
def get_device_hook(device):
|
| 45 |
+
|
| 46 |
+
def _device_hook(module, input, output):
|
| 47 |
+
return to_device(output, device)
|
| 48 |
+
|
| 49 |
+
return _device_hook
|
| 50 |
+
|
| 51 |
+
module.register_forward_hook(get_device_hook(device))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def patch_output_clone(module: torch.nn.Module):
|
| 55 |
+
"""Clone the output, to avoid the inplace problem"""
|
| 56 |
+
|
| 57 |
+
def _clone_hook(module, input, output):
|
| 58 |
+
return output.requires_grad_(True).clone()
|
| 59 |
+
|
| 60 |
+
module.register_forward_hook(_clone_hook)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def patch_output_normalizer(module: torch.nn.Module, model_meta):
|
| 64 |
+
|
| 65 |
+
def lm_head_forward(self, hidden_states):
|
| 66 |
+
return hidden_states
|
| 67 |
+
|
| 68 |
+
lm_heads = ['lm_head', 'output', 'embed_out', 'output_layer']
|
| 69 |
+
llm_prefix = getattr(get_model_arch(model_meta.model_arch), 'language_model', None)
|
| 70 |
+
if llm_prefix:
|
| 71 |
+
llm_model = getattr(module, llm_prefix[0])
|
| 72 |
+
else:
|
| 73 |
+
llm_model = module
|
| 74 |
+
|
| 75 |
+
if 'CausalLM' not in llm_model.__class__.__name__:
|
| 76 |
+
llm_model = module
|
| 77 |
+
|
| 78 |
+
found = False
|
| 79 |
+
for lm_head in lm_heads:
|
| 80 |
+
if hasattr(llm_model, lm_head):
|
| 81 |
+
getattr(llm_model, lm_head).forward = MethodType(lm_head_forward, getattr(llm_model, lm_head))
|
| 82 |
+
found = True
|
| 83 |
+
break
|
| 84 |
+
|
| 85 |
+
assert found, 'Cannot find the proper lm_head name'
|
| 86 |
+
|
| 87 |
+
def forward(self, input_ids: torch.LongTensor = None, attention_mask=None, *args, **kwargs):
|
| 88 |
+
|
| 89 |
+
outputs = self.forward_origin(input_ids=input_ids, attention_mask=attention_mask, *args, **kwargs)
|
| 90 |
+
hidden_states = outputs.logits
|
| 91 |
+
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
|
| 92 |
+
if left_padding:
|
| 93 |
+
embeddings = hidden_states[:, -1]
|
| 94 |
+
else:
|
| 95 |
+
sequence_lengths = attention_mask.sum(dim=1) - 1
|
| 96 |
+
batch_size = hidden_states.shape[0]
|
| 97 |
+
embeddings = hidden_states[torch.arange(batch_size, device=hidden_states.device), sequence_lengths]
|
| 98 |
+
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
|
| 99 |
+
|
| 100 |
+
return {
|
| 101 |
+
'last_hidden_state': embeddings.contiguous(),
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
llm_model.forward_origin = llm_model.forward
|
| 105 |
+
llm_model.forward = MethodType(forward, llm_model)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def patch_output_to_input_device(module: torch.nn.Module):
|
| 109 |
+
"""Patch the module, to make sure the output is in the same device with the input.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
module: The module to be patched
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
def _output_to_input_device_hook(module, args, kwargs, output):
|
| 116 |
+
device = find_device(args) or find_device(kwargs)
|
| 117 |
+
return to_device(output, device)
|
| 118 |
+
|
| 119 |
+
module.register_forward_hook(_output_to_input_device_hook, with_kwargs=True)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@contextmanager
|
| 123 |
+
def patch_device_map():
|
| 124 |
+
_get_no_split_modules = PreTrainedModel._get_no_split_modules
|
| 125 |
+
|
| 126 |
+
def _new_get_no_split_modules(self, device_map: str):
|
| 127 |
+
for module in self.modules():
|
| 128 |
+
if isinstance(module, PreTrainedModel) and module._no_split_modules is None:
|
| 129 |
+
module.__class__._no_split_modules = []
|
| 130 |
+
return _get_no_split_modules(self, device_map)
|
| 131 |
+
|
| 132 |
+
PreTrainedModel._get_no_split_modules = _new_get_no_split_modules
|
| 133 |
+
try:
|
| 134 |
+
yield
|
| 135 |
+
finally:
|
| 136 |
+
PreTrainedModel._get_no_split_modules = _get_no_split_modules
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@contextmanager
|
| 140 |
+
def patch_ignore_check_imports():
|
| 141 |
+
import transformers.dynamic_module_utils as td
|
| 142 |
+
|
| 143 |
+
def _check_imports(filename) -> List[str]:
|
| 144 |
+
return td.get_relative_imports(filename)
|
| 145 |
+
|
| 146 |
+
_old_check_imports = td.check_imports
|
| 147 |
+
td.check_imports = _check_imports
|
| 148 |
+
try:
|
| 149 |
+
yield
|
| 150 |
+
finally:
|
| 151 |
+
td.check_imports = _old_check_imports
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _patch_sequence_classification(model, model_meta):
|
| 155 |
+
hidden_size = HfConfigFactory.get_config_attr(model.config, 'hidden_size')
|
| 156 |
+
initializer_range = HfConfigFactory.get_config_attr(model.config, 'initializer_range')
|
| 157 |
+
|
| 158 |
+
lm_heads = ['lm_head', 'output', 'embed_out', 'output_layer']
|
| 159 |
+
llm_prefix = getattr(get_model_arch(model_meta.model_arch), 'language_model', None)
|
| 160 |
+
if llm_prefix:
|
| 161 |
+
llm_model = getattr(model, llm_prefix[0])
|
| 162 |
+
else:
|
| 163 |
+
llm_model = model
|
| 164 |
+
if 'CausalLM' not in llm_model.__class__.__name__: # fix qwen2_vl
|
| 165 |
+
llm_model = model
|
| 166 |
+
llm_model.num_labels = model.config.num_labels
|
| 167 |
+
llm_model.score = nn.Linear(hidden_size, llm_model.num_labels, bias=False, dtype=llm_model.dtype)
|
| 168 |
+
if llm_model.score.weight.device == torch.device('meta'):
|
| 169 |
+
llm_model.score.to_empty(device='cpu')
|
| 170 |
+
llm_model.score.weight.data.normal_(mean=0.0, std=initializer_range)
|
| 171 |
+
for lm_head in lm_heads:
|
| 172 |
+
if hasattr(llm_model, lm_head):
|
| 173 |
+
setattr(llm_model, lm_head, nn.Identity())
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
origin_forward = llm_model.forward.__func__
|
| 177 |
+
|
| 178 |
+
@wraps(origin_forward)
|
| 179 |
+
def new_forward(self, *args, **kwargs):
|
| 180 |
+
labels = kwargs.pop('labels', None)
|
| 181 |
+
return_dict = kwargs.pop('return_dict', None)
|
| 182 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 183 |
+
input_ids = kwargs.get('input_ids')
|
| 184 |
+
inputs_embeds = kwargs.get('inputs_embeds')
|
| 185 |
+
|
| 186 |
+
output = origin_forward(self, *args, **kwargs)
|
| 187 |
+
output.logits = output.logits.to(self.score.weight.dtype)
|
| 188 |
+
logits = self.score(output.logits)
|
| 189 |
+
if input_ids is not None:
|
| 190 |
+
batch_size = input_ids.shape[0]
|
| 191 |
+
else:
|
| 192 |
+
batch_size = inputs_embeds.shape[0]
|
| 193 |
+
|
| 194 |
+
if self.config.pad_token_id is None and batch_size != 1:
|
| 195 |
+
raise ValueError('Cannot handle batch sizes > 1 if no padding token is defined.')
|
| 196 |
+
if self.config.pad_token_id is None:
|
| 197 |
+
sequence_lengths = -1
|
| 198 |
+
else:
|
| 199 |
+
if input_ids is not None:
|
| 200 |
+
# if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
|
| 201 |
+
sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
|
| 202 |
+
sequence_lengths = sequence_lengths % input_ids.shape[-1]
|
| 203 |
+
sequence_lengths = sequence_lengths.to(logits.device)
|
| 204 |
+
else:
|
| 205 |
+
sequence_lengths = -1
|
| 206 |
+
|
| 207 |
+
pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
|
| 208 |
+
|
| 209 |
+
loss = None
|
| 210 |
+
if labels is not None:
|
| 211 |
+
labels = labels.to(logits.device)
|
| 212 |
+
if self.config.problem_type is None:
|
| 213 |
+
if self.num_labels == 1:
|
| 214 |
+
self.config.problem_type = 'regression'
|
| 215 |
+
elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
|
| 216 |
+
self.config.problem_type = 'single_label_classification'
|
| 217 |
+
else:
|
| 218 |
+
self.config.problem_type = 'multi_label_classification'
|
| 219 |
+
|
| 220 |
+
if self.config.problem_type == 'regression':
|
| 221 |
+
loss_fct = MSELoss()
|
| 222 |
+
if self.num_labels == 1:
|
| 223 |
+
loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
|
| 224 |
+
else:
|
| 225 |
+
loss = loss_fct(pooled_logits, labels)
|
| 226 |
+
elif self.config.problem_type == 'single_label_classification':
|
| 227 |
+
loss_fct = CrossEntropyLoss()
|
| 228 |
+
loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
|
| 229 |
+
elif self.config.problem_type == 'multi_label_classification':
|
| 230 |
+
loss_fct = BCEWithLogitsLoss()
|
| 231 |
+
loss = loss_fct(pooled_logits, labels)
|
| 232 |
+
if not return_dict:
|
| 233 |
+
output = (pooled_logits, ) + output[1:]
|
| 234 |
+
return ((loss, ) + output) if loss is not None else output
|
| 235 |
+
|
| 236 |
+
return SequenceClassifierOutputWithPast(
|
| 237 |
+
loss=loss,
|
| 238 |
+
logits=pooled_logits,
|
| 239 |
+
past_key_values=output.past_key_values,
|
| 240 |
+
hidden_states=output.hidden_states,
|
| 241 |
+
attentions=output.attentions,
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
llm_model.forward = MethodType(new_forward, llm_model)
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@contextmanager
|
| 248 |
+
def patch_automodel_for_sequence_classification(model_meta):
|
| 249 |
+
from_pretrained = PreTrainedModel.from_pretrained.__func__
|
| 250 |
+
|
| 251 |
+
@classmethod
|
| 252 |
+
def _new_from_pretrained(cls, *args, **kwargs):
|
| 253 |
+
__init__ = cls.__init__
|
| 254 |
+
|
| 255 |
+
def __new_init__(self, *args, **kwargs):
|
| 256 |
+
__init__(self, *args, **kwargs)
|
| 257 |
+
_patch_sequence_classification(self, model_meta)
|
| 258 |
+
|
| 259 |
+
cls.__init__ = __new_init__
|
| 260 |
+
if hasattr(cls, '_tp_plan'): # fix tp_plan
|
| 261 |
+
cls._tp_plan = cls._tp_plan or {}
|
| 262 |
+
res = from_pretrained(cls, *args, **kwargs)
|
| 263 |
+
cls.__init__ = __init__
|
| 264 |
+
return res
|
| 265 |
+
|
| 266 |
+
PreTrainedModel.from_pretrained = _new_from_pretrained
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
yield
|
| 270 |
+
finally:
|
| 271 |
+
PreTrainedModel.from_pretrained = classmethod(from_pretrained)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
@contextmanager
|
| 275 |
+
def patch_automodel(automodel_class, model_info):
|
| 276 |
+
from_pretrained = PreTrainedModel.from_pretrained.__func__
|
| 277 |
+
|
| 278 |
+
@classmethod
|
| 279 |
+
def _new_from_pretrained(cls, *args, **kwargs):
|
| 280 |
+
if 'AutoAWQFor' in automodel_class.__name__:
|
| 281 |
+
kwargs.pop('use_cache', None)
|
| 282 |
+
if model_info.quant_method == 'gptq':
|
| 283 |
+
cls.main_input_name = 'input_ids'
|
| 284 |
+
if hasattr(cls, '_tp_plan'): # fix tp_plan
|
| 285 |
+
cls._tp_plan = cls._tp_plan or {}
|
| 286 |
+
model = from_pretrained(cls, *args, **kwargs)
|
| 287 |
+
return model
|
| 288 |
+
|
| 289 |
+
PreTrainedModel.from_pretrained = _new_from_pretrained
|
| 290 |
+
|
| 291 |
+
try:
|
| 292 |
+
yield
|
| 293 |
+
finally:
|
| 294 |
+
PreTrainedModel.from_pretrained = classmethod(from_pretrained)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
_mp_ddp_patched = False
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def patch_mp_ddp():
|
| 301 |
+
"""Patch ddp with device_map.
|
| 302 |
+
After patching, the ddp can run with the device_map.
|
| 303 |
+
This should be called before any training starts.
|
| 304 |
+
"""
|
| 305 |
+
global _mp_ddp_patched
|
| 306 |
+
if is_mp_ddp() and not _mp_ddp_patched:
|
| 307 |
+
_mp_ddp_patched = True
|
| 308 |
+
from accelerate.utils.modeling import get_balanced_memory, infer_auto_device_map
|
| 309 |
+
|
| 310 |
+
@wraps(infer_auto_device_map)
|
| 311 |
+
def _infer_auto_device_map_patch(model: nn.Module,
|
| 312 |
+
max_memory: Optional[Dict[Union[int, str], Union[int, str]]] = None,
|
| 313 |
+
**kwargs) -> Dict[str, Union[int, str, torch.device]]:
|
| 314 |
+
"""The auxiliary function for supports MP + DDP. Monkey Patching.
|
| 315 |
+
add feat in accelerate to support MP + DDP"""
|
| 316 |
+
verbose = kwargs.pop('verbose', False)
|
| 317 |
+
n_gpu = get_device_count()
|
| 318 |
+
_, local_rank, _, local_world_size = get_dist_setting()
|
| 319 |
+
device_ids = list(range(local_rank, n_gpu, local_world_size))
|
| 320 |
+
max_memory = _get_max_memory(device_ids)
|
| 321 |
+
max_memory = _sync_max_memory(max_memory)
|
| 322 |
+
max_memory = get_balanced_memory(model, max_memory, low_zero=False, **kwargs)
|
| 323 |
+
max_memory = {k: v for k, v in max_memory.items() if v > 0}
|
| 324 |
+
return infer_auto_device_map(model, max_memory, verbose=verbose, **kwargs)
|
| 325 |
+
|
| 326 |
+
_old_ddp_init = DDP.__init__
|
| 327 |
+
accelerate.accelerator.torch.nn.parallel.DistributedDataParallel.__init__ = (
|
| 328 |
+
lambda self, model, device_ids, output_device, *args, **kwargs: _old_ddp_init(self, model, *args, **kwargs))
|
| 329 |
+
transformers.modeling_utils.get_balanced_memory = lambda *args, **kwargs: None
|
| 330 |
+
transformers.modeling_utils.infer_auto_device_map = _infer_auto_device_map_patch
|
| 331 |
+
|
| 332 |
+
if is_mp_ddp() or use_torchacc():
|
| 333 |
+
_old_accelerator_init = trainer.Accelerator.__init__
|
| 334 |
+
trainer.Accelerator.__init__ = (lambda self, device_placement=False, *args, **kwargs: _old_accelerator_init(
|
| 335 |
+
self, device_placement=device_placement, *args, **kwargs))
|
| 336 |
+
trainer.Accelerator.verify_device_map = lambda *args, **kwargs: False
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
@contextmanager
|
| 340 |
+
def patch_get_dynamic_module():
|
| 341 |
+
origin_get_cached_module_file = dynamic_module_utils.get_cached_module_file
|
| 342 |
+
|
| 343 |
+
def new_get_cached_module_file(pretrained_model_name_or_path, *args, **kwargs):
|
| 344 |
+
with safe_ddp_context(hash_id=str(pretrained_model_name_or_path)):
|
| 345 |
+
return origin_get_cached_module_file(pretrained_model_name_or_path, *args, **kwargs)
|
| 346 |
+
|
| 347 |
+
dynamic_module_utils.get_cached_module_file = new_get_cached_module_file
|
| 348 |
+
try:
|
| 349 |
+
yield
|
| 350 |
+
finally:
|
| 351 |
+
dynamic_module_utils.get_cached_module_file = origin_get_cached_module_file
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
@contextmanager
|
| 355 |
+
def patch_tp_plan():
|
| 356 |
+
if not is_mp_ddp() or version.parse(transformers.__version__) < version.parse('4.50'):
|
| 357 |
+
yield
|
| 358 |
+
return
|
| 359 |
+
WORLD_SIZE = os.environ.get('WORLD_SIZE')
|
| 360 |
+
os.environ['_PATCH_WORLD_SIZE'] = WORLD_SIZE
|
| 361 |
+
os.environ.pop('WORLD_SIZE')
|
| 362 |
+
yield
|
| 363 |
+
os.environ['WORLD_SIZE'] = WORLD_SIZE
|
ms-swift/swift/llm/sampling/distill_sampler.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from copy import deepcopy
|
| 3 |
+
from typing import List, Optional
|
| 4 |
+
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
from swift.llm.infer.protocol import InferRequest, RequestConfig
|
| 8 |
+
from swift.llm.sampling.vanilla_sampler import VanillaSampler
|
| 9 |
+
from .utils import get_messages_md5
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class OpenAI_Engine():
|
| 13 |
+
|
| 14 |
+
def __init__(
|
| 15 |
+
self,
|
| 16 |
+
model: str,
|
| 17 |
+
stream: bool = False,
|
| 18 |
+
base_url: str = 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
| 19 |
+
api_key: str = '',
|
| 20 |
+
**kwargs,
|
| 21 |
+
):
|
| 22 |
+
self.model = model
|
| 23 |
+
self.stream = stream
|
| 24 |
+
self.client = OpenAI(api_key=api_key if api_key else os.getenv('OPENAI_API_KEY'), base_url=base_url, **kwargs)
|
| 25 |
+
|
| 26 |
+
def infer(
|
| 27 |
+
self,
|
| 28 |
+
infer_requests: List[InferRequest],
|
| 29 |
+
request_config: Optional[RequestConfig] = None,
|
| 30 |
+
):
|
| 31 |
+
resp_contents = []
|
| 32 |
+
for infer_request in infer_requests:
|
| 33 |
+
completion = self.client.chat.completions.create(
|
| 34 |
+
model=self.model,
|
| 35 |
+
messages=infer_request['messages'],
|
| 36 |
+
temperature=request_config.temperature,
|
| 37 |
+
top_p=request_config.top_p,
|
| 38 |
+
max_tokens=request_config.max_tokens,
|
| 39 |
+
stream=self.stream,
|
| 40 |
+
)
|
| 41 |
+
if self.stream:
|
| 42 |
+
reasoning_content = ''
|
| 43 |
+
content = ''
|
| 44 |
+
for chunk in completion:
|
| 45 |
+
chunk_choices = chunk.choices
|
| 46 |
+
if len(chunk_choices) == 0:
|
| 47 |
+
continue
|
| 48 |
+
reasoning_chunk = chunk_choices[0].delta.reasoning_content if hasattr(
|
| 49 |
+
chunk_choices[0].delta, 'reasoning_content') else ''
|
| 50 |
+
answer_chunk = chunk_choices[0].delta.content
|
| 51 |
+
if reasoning_chunk:
|
| 52 |
+
reasoning_content += reasoning_chunk
|
| 53 |
+
elif answer_chunk:
|
| 54 |
+
content += answer_chunk
|
| 55 |
+
else:
|
| 56 |
+
if hasattr(completion.choices[0].message, 'reasoning_content'):
|
| 57 |
+
reasoning_content = completion.choices[0].message.reasoning_content
|
| 58 |
+
content = completion.choices[0].message.content
|
| 59 |
+
assert len(content) > 0, 'Empty completion'
|
| 60 |
+
if reasoning_content:
|
| 61 |
+
resp_content = f'<think>{reasoning_content}</think>\n\n<answer>{content}</answer>'
|
| 62 |
+
else:
|
| 63 |
+
resp_content = content
|
| 64 |
+
resp_contents.append(resp_content)
|
| 65 |
+
|
| 66 |
+
return resp_contents
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class DistillSampler(VanillaSampler):
|
| 70 |
+
|
| 71 |
+
def __init__(self, *args, **kwargs):
|
| 72 |
+
super(VanillaSampler, self).__init__(*args, **kwargs)
|
| 73 |
+
assert self.args.sampler_engine == 'client'
|
| 74 |
+
_Engine = OpenAI_Engine
|
| 75 |
+
self.infer_engine = _Engine(model=self.args.model, stream=self.args.stream, **self.args.engine_kwargs)
|
| 76 |
+
self.infer_engine.strict = False
|
| 77 |
+
self.caches = self.read_cache()
|
| 78 |
+
|
| 79 |
+
def _prepare_model_tokenizer(self):
|
| 80 |
+
pass
|
| 81 |
+
|
| 82 |
+
def _prepare_template(self):
|
| 83 |
+
pass
|
| 84 |
+
|
| 85 |
+
def extract_choice(self, resp):
|
| 86 |
+
message = resp.choices[0].message
|
| 87 |
+
if hasattr(message, 'reasoning_content'):
|
| 88 |
+
reps_content = f'<think>{message.reasoning_content}</think>\n\n<answer>{message.content}</answer>'
|
| 89 |
+
else:
|
| 90 |
+
reps_content = message.content
|
| 91 |
+
return reps_content
|
| 92 |
+
|
| 93 |
+
def generate(self, data):
|
| 94 |
+
resp_all = []
|
| 95 |
+
infer_requests = []
|
| 96 |
+
sent = 0
|
| 97 |
+
rows = self.convert_data_to_rows(data)
|
| 98 |
+
for idx, row in enumerate(rows):
|
| 99 |
+
row = deepcopy(row)
|
| 100 |
+
messages = row['messages']
|
| 101 |
+
uuid = get_messages_md5(row)
|
| 102 |
+
if uuid in self.caches:
|
| 103 |
+
choices = self.caches[uuid]['choices']
|
| 104 |
+
if len(choices) == self.args.num_return_sequences:
|
| 105 |
+
continue
|
| 106 |
+
if self.args.system:
|
| 107 |
+
if messages[0]['role'] == 'system':
|
| 108 |
+
messages[0]['content'] = self.args.system
|
| 109 |
+
else:
|
| 110 |
+
messages.insert(0, {'role': 'system', 'content': self.args.system})
|
| 111 |
+
if messages[-1]['role'] == 'assistant':
|
| 112 |
+
messages = messages[:-1]
|
| 113 |
+
|
| 114 |
+
row['messages'] = messages
|
| 115 |
+
infer_request = row
|
| 116 |
+
for i in range(self.args.num_return_sequences):
|
| 117 |
+
infer_requests.append(deepcopy(infer_request))
|
| 118 |
+
sent += 1
|
| 119 |
+
|
| 120 |
+
request_config = RequestConfig(
|
| 121 |
+
max_tokens=self.args.max_new_tokens,
|
| 122 |
+
temperature=self.args.temperature,
|
| 123 |
+
top_k=self.args.top_k,
|
| 124 |
+
top_p=self.args.top_p,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
resp_list = []
|
| 128 |
+
if len(infer_requests) > 0:
|
| 129 |
+
resp_list = self.infer_engine.infer(infer_requests, request_config=request_config)
|
| 130 |
+
|
| 131 |
+
_cur = 0
|
| 132 |
+
for idx, row in enumerate(rows):
|
| 133 |
+
row = deepcopy(row)
|
| 134 |
+
uuid = get_messages_md5(row)
|
| 135 |
+
if uuid in self.caches:
|
| 136 |
+
choices = self.caches[uuid]['choices']
|
| 137 |
+
if len(choices) == self.args.num_return_sequences:
|
| 138 |
+
row['choices'] = choices
|
| 139 |
+
resp_all.append(row)
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
resps = row
|
| 143 |
+
resps['choices'] = []
|
| 144 |
+
for j in range(self.args.num_return_sequences * _cur, self.args.num_return_sequences * (_cur + 1)):
|
| 145 |
+
resps['choices'].append(resp_list[j])
|
| 146 |
+
resp_all.append(resps)
|
| 147 |
+
_cur += 1
|
| 148 |
+
return resp_all
|