Student0809 commited on
Commit
636c5b4
·
verified ·
1 Parent(s): 14e1dca

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ms-swift/.ipynb_checkpoints/requirements-checkpoint.txt +1 -0
  2. ms-swift/processed_data/processed_overlap5s_isoverlap.json +0 -0
  3. ms-swift/processed_data/processed_overlap5s_issilence.json +0 -0
  4. ms-swift/processed_data/processed_overlap5s_transcriptions.json +0 -0
  5. ms-swift/processed_data/processed_silence_issilence.json +0 -0
  6. ms-swift/silence_overlaps/700/original/overlap5s_speaker_segments.json +0 -0
  7. ms-swift/silence_overlaps/700/original/overlap5s_transcriptions.json +0 -0
  8. ms-swift/silence_overlaps/700/original/silence_issilence.json +0 -0
  9. ms-swift/silence_overlaps/700/original/silence_speaker_segments.json +0 -0
  10. ms-swift/silence_overlaps/700/original/silence_transcriptions.json +0 -0
  11. ms-swift/silence_overlaps/700/silence_isoverlap_train.json +1152 -0
  12. ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/silence_isoverlaps_test-checkpoint.json +27 -0
  13. ms-swift/silence_overlaps/700/test/overlap5s_transcriptions_test.json +27 -0
  14. ms-swift/silence_overlaps/700/test/silence_isoverlaps_test.json +27 -0
  15. ms-swift/silence_overlaps/700/test/silence_issilence_test.json +27 -0
  16. ms-swift/silence_overlaps/700/train/.ipynb_checkpoints/silence_speaker_segments_train-checkpoint.json +0 -0
  17. ms-swift/silence_overlaps/700/train/silence_transcriptions_train.json +0 -0
  18. ms-swift/silence_overlaps/delete_transcript2.json +1 -0
  19. ms-swift/silence_overlaps/only_overlap/overlap5s_isoverlap_train.json +0 -0
  20. ms-swift/silence_overlaps/overlap5s_issilence.json +0 -0
  21. ms-swift/silence_overlaps/silence_transcriptions.json +0 -0
  22. ms-swift/silence_overlaps/test/.ipynb_checkpoints/test-checkpoint.json +566 -0
  23. ms-swift/silence_overlaps/test/test.json +566 -0
  24. ms-swift/swift/llm/dataset/__init__.py +35 -0
  25. ms-swift/swift/llm/dataset/__pycache__/loader.cpython-310.pyc +0 -0
  26. ms-swift/swift/llm/dataset/__pycache__/media.cpython-310.pyc +0 -0
  27. ms-swift/swift/llm/dataset/dataset/__init__.py +2 -0
  28. ms-swift/swift/llm/dataset/dataset/__pycache__/llm.cpython-310.pyc +0 -0
  29. ms-swift/swift/llm/dataset/preprocessor/extra.py +112 -0
  30. ms-swift/swift/llm/ds_config/zero2.json +35 -0
  31. ms-swift/swift/llm/ds_config/zero3.json +44 -0
  32. ms-swift/swift/llm/ds_config/zero3_offload.json +42 -0
  33. ms-swift/swift/llm/eval/__init__.py +2 -0
  34. ms-swift/swift/llm/eval/utils.py +53 -0
  35. ms-swift/swift/llm/export/__init__.py +5 -0
  36. ms-swift/swift/llm/infer/__init__.py +35 -0
  37. ms-swift/swift/llm/infer/infer_engine/__pycache__/utils.cpython-310.pyc +0 -0
  38. ms-swift/swift/llm/model/__pycache__/model_arch.cpython-310.pyc +0 -0
  39. ms-swift/swift/llm/model/model/__pycache__/gemma.cpython-310.pyc +0 -0
  40. ms-swift/swift/llm/model/model/__pycache__/glm.cpython-310.pyc +0 -0
  41. ms-swift/swift/llm/model/model/__pycache__/llama.cpython-310.pyc +0 -0
  42. ms-swift/swift/llm/model/model/__pycache__/llava.cpython-310.pyc +0 -0
  43. ms-swift/swift/llm/model/model/__pycache__/mistral.cpython-310.pyc +0 -0
  44. ms-swift/swift/llm/model/model/baichuan.py +147 -0
  45. ms-swift/swift/llm/model/model/mamba.py +41 -0
  46. ms-swift/swift/llm/model/model/stepfun.py +86 -0
  47. ms-swift/swift/llm/model/model/telechat.py +59 -0
  48. ms-swift/swift/llm/model/model/valley.py +82 -0
  49. ms-swift/swift/llm/model/patcher.py +363 -0
  50. ms-swift/swift/llm/sampling/distill_sampler.py +148 -0
ms-swift/.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ -r requirements/framework.txt
ms-swift/processed_data/processed_overlap5s_isoverlap.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/processed_data/processed_overlap5s_issilence.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/processed_data/processed_overlap5s_transcriptions.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/processed_data/processed_silence_issilence.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/original/overlap5s_speaker_segments.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/original/overlap5s_transcriptions.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/original/silence_issilence.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/original/silence_speaker_segments.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/original/silence_transcriptions.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/silence_isoverlap_train.json ADDED
@@ -0,0 +1,1152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1101857.wav",
4
+ "key": "SODA_PROCESSED--train--1101857",
5
+ "model_output": "No significant overlaps found."
6
+ },
7
+ {
8
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--508884.wav",
9
+ "key": "SODA_PROCESSED--train--508884",
10
+ "model_output": "No significant overlaps found."
11
+ },
12
+ {
13
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1113674.wav",
14
+ "key": "SODA_PROCESSED--train--1113674",
15
+ "model_output": "No significant overlaps found."
16
+ },
17
+ {
18
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--158293.wav",
19
+ "key": "SODA_PROCESSED--train--158293",
20
+ "model_output": "No significant overlaps found."
21
+ },
22
+ {
23
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--631363.wav",
24
+ "key": "SODA_PROCESSED--train--631363",
25
+ "model_output": "No significant overlaps found."
26
+ },
27
+ {
28
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277322.wav",
29
+ "key": "SODA_PROCESSED--train--277322",
30
+ "model_output": "No significant overlaps found."
31
+ },
32
+ {
33
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1131940.wav",
34
+ "key": "SODA_PROCESSED--train--1131940",
35
+ "model_output": "No significant overlaps found."
36
+ },
37
+ {
38
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1108753.wav",
39
+ "key": "SODA_PROCESSED--train--1108753",
40
+ "model_output": "No significant overlaps found."
41
+ },
42
+ {
43
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--27924.wav",
44
+ "key": "SODA_PROCESSED--train--27924",
45
+ "model_output": "No significant overlaps found."
46
+ },
47
+ {
48
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--374749.wav",
49
+ "key": "SODA_PROCESSED--train--374749",
50
+ "model_output": "No significant overlaps found."
51
+ },
52
+ {
53
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--821468.wav",
54
+ "key": "SODA_PROCESSED--train--821468",
55
+ "model_output": "No significant overlaps found."
56
+ },
57
+ {
58
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--416516.wav",
59
+ "key": "SODA_PROCESSED--train--416516",
60
+ "model_output": "No significant overlaps found."
61
+ },
62
+ {
63
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1029082.wav",
64
+ "key": "SODA_PROCESSED--train--1029082",
65
+ "model_output": "No significant overlaps found."
66
+ },
67
+ {
68
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--992151.wav",
69
+ "key": "SODA_PROCESSED--train--992151",
70
+ "model_output": "No significant overlaps found."
71
+ },
72
+ {
73
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--744708.wav",
74
+ "key": "SODA_PROCESSED--train--744708",
75
+ "model_output": "No significant overlaps found."
76
+ },
77
+ {
78
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--11862.wav",
79
+ "key": "SODA_PROCESSED--train--11862",
80
+ "model_output": "No significant overlaps found."
81
+ },
82
+ {
83
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--419304.wav",
84
+ "key": "SODA_PROCESSED--train--419304",
85
+ "model_output": "No significant overlaps found."
86
+ },
87
+ {
88
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--98673.wav",
89
+ "key": "SODA_PROCESSED--train--98673",
90
+ "model_output": "No significant overlaps found."
91
+ },
92
+ {
93
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--22719.wav",
94
+ "key": "SODA_PROCESSED--train--22719",
95
+ "model_output": "No significant overlaps found."
96
+ },
97
+ {
98
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1028263.wav",
99
+ "key": "SODA_PROCESSED--train--1028263",
100
+ "model_output": "No significant overlaps found."
101
+ },
102
+ {
103
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--848051.wav",
104
+ "key": "SODA_PROCESSED--train--848051",
105
+ "model_output": "No significant overlaps found."
106
+ },
107
+ {
108
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--511668.wav",
109
+ "key": "SODA_PROCESSED--train--511668",
110
+ "model_output": "No significant overlaps found."
111
+ },
112
+ {
113
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12047.wav",
114
+ "key": "SODA_PROCESSED--train--12047",
115
+ "model_output": "No significant overlaps found."
116
+ },
117
+ {
118
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--153751.wav",
119
+ "key": "SODA_PROCESSED--train--153751",
120
+ "model_output": "No significant overlaps found."
121
+ },
122
+ {
123
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795559.wav",
124
+ "key": "SODA_PROCESSED--train--795559",
125
+ "model_output": "No significant overlaps found."
126
+ },
127
+ {
128
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--387024.wav",
129
+ "key": "SODA_PROCESSED--train--387024",
130
+ "model_output": "No significant overlaps found."
131
+ },
132
+ {
133
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
134
+ "key": "SODA_PROCESSED--train--1168213",
135
+ "model_output": "No significant overlaps found."
136
+ },
137
+ {
138
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1123711.wav",
139
+ "key": "SODA_PROCESSED--train--1123711",
140
+ "model_output": "No significant overlaps found."
141
+ },
142
+ {
143
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--819618.wav",
144
+ "key": "SODA_PROCESSED--train--819618",
145
+ "model_output": "No significant overlaps found."
146
+ },
147
+ {
148
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--752118.wav",
149
+ "key": "SODA_PROCESSED--train--752118",
150
+ "model_output": "No significant overlaps found."
151
+ },
152
+ {
153
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--581770.wav",
154
+ "key": "SODA_PROCESSED--train--581770",
155
+ "model_output": "No significant overlaps found."
156
+ },
157
+ {
158
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--276032.wav",
159
+ "key": "SODA_PROCESSED--train--276032",
160
+ "model_output": "No significant overlaps found."
161
+ },
162
+ {
163
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556475.wav",
164
+ "key": "SODA_PROCESSED--train--556475",
165
+ "model_output": "No significant overlaps found."
166
+ },
167
+ {
168
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674667.wav",
169
+ "key": "SODA_PROCESSED--train--674667",
170
+ "model_output": "No significant overlaps found."
171
+ },
172
+ {
173
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--501206.wav",
174
+ "key": "SODA_PROCESSED--train--501206",
175
+ "model_output": "No significant overlaps found."
176
+ },
177
+ {
178
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--905725.wav",
179
+ "key": "SODA_PROCESSED--train--905725",
180
+ "model_output": "No significant overlaps found."
181
+ },
182
+ {
183
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--265829.wav",
184
+ "key": "SODA_PROCESSED--train--265829",
185
+ "model_output": "No significant overlaps found."
186
+ },
187
+ {
188
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--207527.wav",
189
+ "key": "SODA_PROCESSED--train--207527",
190
+ "model_output": "No significant overlaps found."
191
+ },
192
+ {
193
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--985415.wav",
194
+ "key": "SODA_PROCESSED--train--985415",
195
+ "model_output": "No significant overlaps found."
196
+ },
197
+ {
198
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--115102.wav",
199
+ "key": "SODA_PROCESSED--train--115102",
200
+ "model_output": "No significant overlaps found."
201
+ },
202
+ {
203
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--8820.wav",
204
+ "key": "SODA_PROCESSED--train--8820",
205
+ "model_output": "No significant overlaps found."
206
+ },
207
+ {
208
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--453454.wav",
209
+ "key": "SODA_PROCESSED--train--453454",
210
+ "model_output": "No significant overlaps found."
211
+ },
212
+ {
213
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--375003.wav",
214
+ "key": "SODA_PROCESSED--train--375003",
215
+ "model_output": "No significant overlaps found."
216
+ },
217
+ {
218
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--757426.wav",
219
+ "key": "SODA_PROCESSED--train--757426",
220
+ "model_output": "No significant overlaps found."
221
+ },
222
+ {
223
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--202914.wav",
224
+ "key": "SODA_PROCESSED--train--202914",
225
+ "model_output": "No significant overlaps found."
226
+ },
227
+ {
228
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007416.wav",
229
+ "key": "SODA_PROCESSED--train--1007416",
230
+ "model_output": "No significant overlaps found."
231
+ },
232
+ {
233
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193891.wav",
234
+ "key": "SODA_PROCESSED--train--193891",
235
+ "model_output": "No significant overlaps found."
236
+ },
237
+ {
238
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--96343.wav",
239
+ "key": "SODA_PROCESSED--train--96343",
240
+ "model_output": "No significant overlaps found."
241
+ },
242
+ {
243
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1031234.wav",
244
+ "key": "SODA_PROCESSED--train--1031234",
245
+ "model_output": "No significant overlaps found."
246
+ },
247
+ {
248
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--798455.wav",
249
+ "key": "SODA_PROCESSED--train--798455",
250
+ "model_output": "No significant overlaps found."
251
+ },
252
+ {
253
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--438636.wav",
254
+ "key": "SODA_PROCESSED--train--438636",
255
+ "model_output": "No significant overlaps found."
256
+ },
257
+ {
258
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--776766.wav",
259
+ "key": "SODA_PROCESSED--train--776766",
260
+ "model_output": "No significant overlaps found."
261
+ },
262
+ {
263
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--691830.wav",
264
+ "key": "SODA_PROCESSED--train--691830",
265
+ "model_output": "No significant overlaps found."
266
+ },
267
+ {
268
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--524306.wav",
269
+ "key": "SODA_PROCESSED--train--524306",
270
+ "model_output": "No significant overlaps found."
271
+ },
272
+ {
273
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--471264.wav",
274
+ "key": "SODA_PROCESSED--train--471264",
275
+ "model_output": "No significant overlaps found."
276
+ },
277
+ {
278
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421778.wav",
279
+ "key": "SODA_PROCESSED--train--421778",
280
+ "model_output": "No significant overlaps found."
281
+ },
282
+ {
283
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--541347.wav",
284
+ "key": "SODA_PROCESSED--train--541347",
285
+ "model_output": "No significant overlaps found."
286
+ },
287
+ {
288
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1084325.wav",
289
+ "key": "SODA_PROCESSED--train--1084325",
290
+ "model_output": "No significant overlaps found."
291
+ },
292
+ {
293
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29039.wav",
294
+ "key": "SODA_PROCESSED--train--29039",
295
+ "model_output": "No significant overlaps found."
296
+ },
297
+ {
298
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1182464.wav",
299
+ "key": "SODA_PROCESSED--train--1182464",
300
+ "model_output": "No significant overlaps found."
301
+ },
302
+ {
303
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--735517.wav",
304
+ "key": "SODA_PROCESSED--train--735517",
305
+ "model_output": "No significant overlaps found."
306
+ },
307
+ {
308
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--417260.wav",
309
+ "key": "SODA_PROCESSED--train--417260",
310
+ "model_output": "No significant overlaps found."
311
+ },
312
+ {
313
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--784738.wav",
314
+ "key": "SODA_PROCESSED--train--784738",
315
+ "model_output": "No significant overlaps found."
316
+ },
317
+ {
318
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303363.wav",
319
+ "key": "SODA_PROCESSED--train--303363",
320
+ "model_output": "No significant overlaps found."
321
+ },
322
+ {
323
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--795181.wav",
324
+ "key": "SODA_PROCESSED--train--795181",
325
+ "model_output": "No significant overlaps found."
326
+ },
327
+ {
328
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--33760.wav",
329
+ "key": "SODA_PROCESSED--train--33760",
330
+ "model_output": "No significant overlaps found."
331
+ },
332
+ {
333
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--126878.wav",
334
+ "key": "SODA_PROCESSED--train--126878",
335
+ "model_output": "No significant overlaps found."
336
+ },
337
+ {
338
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--317167.wav",
339
+ "key": "SODA_PROCESSED--train--317167",
340
+ "model_output": "No significant overlaps found."
341
+ },
342
+ {
343
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463322.wav",
344
+ "key": "SODA_PROCESSED--train--463322",
345
+ "model_output": "No significant overlaps found."
346
+ },
347
+ {
348
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--51285.wav",
349
+ "key": "SODA_PROCESSED--train--51285",
350
+ "model_output": "No significant overlaps found."
351
+ },
352
+ {
353
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1081079.wav",
354
+ "key": "SODA_PROCESSED--train--1081079",
355
+ "model_output": "No significant overlaps found."
356
+ },
357
+ {
358
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--58199.wav",
359
+ "key": "SODA_PROCESSED--train--58199",
360
+ "model_output": "No significant overlaps found."
361
+ },
362
+ {
363
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1017701.wav",
364
+ "key": "SODA_PROCESSED--train--1017701",
365
+ "model_output": "No significant overlaps found."
366
+ },
367
+ {
368
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--762267.wav",
369
+ "key": "SODA_PROCESSED--train--762267",
370
+ "model_output": "No significant overlaps found."
371
+ },
372
+ {
373
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4948.wav",
374
+ "key": "SODA_PROCESSED--train--4948",
375
+ "model_output": "No significant overlaps found."
376
+ },
377
+ {
378
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737676.wav",
379
+ "key": "SODA_PROCESSED--train--737676",
380
+ "model_output": "No significant overlaps found."
381
+ },
382
+ {
383
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--606362.wav",
384
+ "key": "SODA_PROCESSED--train--606362",
385
+ "model_output": "No significant overlaps found."
386
+ },
387
+ {
388
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--674832.wav",
389
+ "key": "SODA_PROCESSED--train--674832",
390
+ "model_output": "No significant overlaps found."
391
+ },
392
+ {
393
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--588465.wav",
394
+ "key": "SODA_PROCESSED--train--588465",
395
+ "model_output": "No significant overlaps found."
396
+ },
397
+ {
398
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--386163.wav",
399
+ "key": "SODA_PROCESSED--train--386163",
400
+ "model_output": "No significant overlaps found."
401
+ },
402
+ {
403
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--421624.wav",
404
+ "key": "SODA_PROCESSED--train--421624",
405
+ "model_output": "No significant overlaps found."
406
+ },
407
+ {
408
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977126.wav",
409
+ "key": "SODA_PROCESSED--train--977126",
410
+ "model_output": "No significant overlaps found."
411
+ },
412
+ {
413
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--932676.wav",
414
+ "key": "SODA_PROCESSED--train--932676",
415
+ "model_output": "No significant overlaps found."
416
+ },
417
+ {
418
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--315768.wav",
419
+ "key": "SODA_PROCESSED--train--315768",
420
+ "model_output": "No significant overlaps found."
421
+ },
422
+ {
423
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--939669.wav",
424
+ "key": "SODA_PROCESSED--train--939669",
425
+ "model_output": "No significant overlaps found."
426
+ },
427
+ {
428
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1174912.wav",
429
+ "key": "SODA_PROCESSED--train--1174912",
430
+ "model_output": "No significant overlaps found."
431
+ },
432
+ {
433
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1023331.wav",
434
+ "key": "SODA_PROCESSED--train--1023331",
435
+ "model_output": "No significant overlaps found."
436
+ },
437
+ {
438
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--144310.wav",
439
+ "key": "SODA_PROCESSED--train--144310",
440
+ "model_output": "No significant overlaps found."
441
+ },
442
+ {
443
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1011922.wav",
444
+ "key": "SODA_PROCESSED--train--1011922",
445
+ "model_output": "No significant overlaps found."
446
+ },
447
+ {
448
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--713730.wav",
449
+ "key": "SODA_PROCESSED--train--713730",
450
+ "model_output": "No significant overlaps found."
451
+ },
452
+ {
453
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708040.wav",
454
+ "key": "SODA_PROCESSED--train--708040",
455
+ "model_output": "No significant overlaps found."
456
+ },
457
+ {
458
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--860576.wav",
459
+ "key": "SODA_PROCESSED--train--860576",
460
+ "model_output": "No significant overlaps found."
461
+ },
462
+ {
463
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1001007.wav",
464
+ "key": "SODA_PROCESSED--train--1001007",
465
+ "model_output": "No significant overlaps found."
466
+ },
467
+ {
468
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1166623.wav",
469
+ "key": "SODA_PROCESSED--train--1166623",
470
+ "model_output": "No significant overlaps found."
471
+ },
472
+ {
473
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--372789.wav",
474
+ "key": "SODA_PROCESSED--train--372789",
475
+ "model_output": "No significant overlaps found."
476
+ },
477
+ {
478
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468603.wav",
479
+ "key": "SODA_PROCESSED--train--468603",
480
+ "model_output": "No significant overlaps found."
481
+ },
482
+ {
483
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--233562.wav",
484
+ "key": "SODA_PROCESSED--train--233562",
485
+ "model_output": "No significant overlaps found."
486
+ },
487
+ {
488
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--476626.wav",
489
+ "key": "SODA_PROCESSED--train--476626",
490
+ "model_output": "No significant overlaps found."
491
+ },
492
+ {
493
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--49462.wav",
494
+ "key": "SODA_PROCESSED--train--49462",
495
+ "model_output": "No significant overlaps found."
496
+ },
497
+ {
498
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--303336.wav",
499
+ "key": "SODA_PROCESSED--train--303336",
500
+ "model_output": "No significant overlaps found."
501
+ },
502
+ {
503
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--221358.wav",
504
+ "key": "SODA_PROCESSED--train--221358",
505
+ "model_output": "No significant overlaps found."
506
+ },
507
+ {
508
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--843615.wav",
509
+ "key": "SODA_PROCESSED--train--843615",
510
+ "model_output": "No significant overlaps found."
511
+ },
512
+ {
513
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--873625.wav",
514
+ "key": "SODA_PROCESSED--train--873625",
515
+ "model_output": "No significant overlaps found."
516
+ },
517
+ {
518
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--4814.wav",
519
+ "key": "SODA_PROCESSED--train--4814",
520
+ "model_output": "No significant overlaps found."
521
+ },
522
+ {
523
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--280675.wav",
524
+ "key": "SODA_PROCESSED--train--280675",
525
+ "model_output": "No significant overlaps found."
526
+ },
527
+ {
528
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1132437.wav",
529
+ "key": "SODA_PROCESSED--train--1132437",
530
+ "model_output": "No significant overlaps found."
531
+ },
532
+ {
533
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--650705.wav",
534
+ "key": "SODA_PROCESSED--train--650705",
535
+ "model_output": "No significant overlaps found."
536
+ },
537
+ {
538
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186756.wav",
539
+ "key": "SODA_PROCESSED--train--1186756",
540
+ "model_output": "No significant overlaps found."
541
+ },
542
+ {
543
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--108309.wav",
544
+ "key": "SODA_PROCESSED--train--108309",
545
+ "model_output": "No significant overlaps found."
546
+ },
547
+ {
548
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--82238.wav",
549
+ "key": "SODA_PROCESSED--train--82238",
550
+ "model_output": "No significant overlaps found."
551
+ },
552
+ {
553
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--616846.wav",
554
+ "key": "SODA_PROCESSED--train--616846",
555
+ "model_output": "No significant overlaps found."
556
+ },
557
+ {
558
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--61606.wav",
559
+ "key": "SODA_PROCESSED--train--61606",
560
+ "model_output": "No significant overlaps found."
561
+ },
562
+ {
563
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--370577.wav",
564
+ "key": "SODA_PROCESSED--train--370577",
565
+ "model_output": "No significant overlaps found."
566
+ },
567
+ {
568
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--69581.wav",
569
+ "key": "SODA_PROCESSED--train--69581",
570
+ "model_output": "No significant overlaps found."
571
+ },
572
+ {
573
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--180962.wav",
574
+ "key": "SODA_PROCESSED--train--180962",
575
+ "model_output": "No significant overlaps found."
576
+ },
577
+ {
578
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--578986.wav",
579
+ "key": "SODA_PROCESSED--train--578986",
580
+ "model_output": "No significant overlaps found."
581
+ },
582
+ {
583
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--132857.wav",
584
+ "key": "SODA_PROCESSED--train--132857",
585
+ "model_output": "No significant overlaps found."
586
+ },
587
+ {
588
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--188417.wav",
589
+ "key": "SODA_PROCESSED--train--188417",
590
+ "model_output": "No significant overlaps found."
591
+ },
592
+ {
593
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--771154.wav",
594
+ "key": "SODA_PROCESSED--train--771154",
595
+ "model_output": "No significant overlaps found."
596
+ },
597
+ {
598
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--720445.wav",
599
+ "key": "SODA_PROCESSED--train--720445",
600
+ "model_output": "No significant overlaps found."
601
+ },
602
+ {
603
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--514225.wav",
604
+ "key": "SODA_PROCESSED--train--514225",
605
+ "model_output": "No significant overlaps found."
606
+ },
607
+ {
608
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--815822.wav",
609
+ "key": "SODA_PROCESSED--train--815822",
610
+ "model_output": "No significant overlaps found."
611
+ },
612
+ {
613
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--761001.wav",
614
+ "key": "SODA_PROCESSED--train--761001",
615
+ "model_output": "No significant overlaps found."
616
+ },
617
+ {
618
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1061857.wav",
619
+ "key": "SODA_PROCESSED--train--1061857",
620
+ "model_output": "No significant overlaps found."
621
+ },
622
+ {
623
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--475793.wav",
624
+ "key": "SODA_PROCESSED--train--475793",
625
+ "model_output": "No significant overlaps found."
626
+ },
627
+ {
628
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--406352.wav",
629
+ "key": "SODA_PROCESSED--train--406352",
630
+ "model_output": "No significant overlaps found."
631
+ },
632
+ {
633
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--486716.wav",
634
+ "key": "SODA_PROCESSED--train--486716",
635
+ "model_output": "No significant overlaps found."
636
+ },
637
+ {
638
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--468879.wav",
639
+ "key": "SODA_PROCESSED--train--468879",
640
+ "model_output": "No significant overlaps found."
641
+ },
642
+ {
643
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--338832.wav",
644
+ "key": "SODA_PROCESSED--train--338832",
645
+ "model_output": "No significant overlaps found."
646
+ },
647
+ {
648
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--845126.wav",
649
+ "key": "SODA_PROCESSED--train--845126",
650
+ "model_output": "No significant overlaps found."
651
+ },
652
+ {
653
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--490986.wav",
654
+ "key": "SODA_PROCESSED--train--490986",
655
+ "model_output": "No significant overlaps found."
656
+ },
657
+ {
658
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1128813.wav",
659
+ "key": "SODA_PROCESSED--train--1128813",
660
+ "model_output": "No significant overlaps found."
661
+ },
662
+ {
663
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--193134.wav",
664
+ "key": "SODA_PROCESSED--train--193134",
665
+ "model_output": "No significant overlaps found."
666
+ },
667
+ {
668
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--258235.wav",
669
+ "key": "SODA_PROCESSED--train--258235",
670
+ "model_output": "No significant overlaps found."
671
+ },
672
+ {
673
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--895260.wav",
674
+ "key": "SODA_PROCESSED--train--895260",
675
+ "model_output": "No significant overlaps found."
676
+ },
677
+ {
678
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--119322.wav",
679
+ "key": "SODA_PROCESSED--train--119322",
680
+ "model_output": "No significant overlaps found."
681
+ },
682
+ {
683
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--413405.wav",
684
+ "key": "SODA_PROCESSED--train--413405",
685
+ "model_output": "No significant overlaps found."
686
+ },
687
+ {
688
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--346041.wav",
689
+ "key": "SODA_PROCESSED--train--346041",
690
+ "model_output": "No significant overlaps found."
691
+ },
692
+ {
693
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--718092.wav",
694
+ "key": "SODA_PROCESSED--train--718092",
695
+ "model_output": "No significant overlaps found."
696
+ },
697
+ {
698
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--218634.wav",
699
+ "key": "SODA_PROCESSED--train--218634",
700
+ "model_output": "No significant overlaps found."
701
+ },
702
+ {
703
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--835488.wav",
704
+ "key": "SODA_PROCESSED--train--835488",
705
+ "model_output": "No significant overlaps found."
706
+ },
707
+ {
708
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--113543.wav",
709
+ "key": "SODA_PROCESSED--train--113543",
710
+ "model_output": "No significant overlaps found."
711
+ },
712
+ {
713
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--869455.wav",
714
+ "key": "SODA_PROCESSED--train--869455",
715
+ "model_output": "No significant overlaps found."
716
+ },
717
+ {
718
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--330048.wav",
719
+ "key": "SODA_PROCESSED--train--330048",
720
+ "model_output": "No significant overlaps found."
721
+ },
722
+ {
723
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--766234.wav",
724
+ "key": "SODA_PROCESSED--train--766234",
725
+ "model_output": "No significant overlaps found."
726
+ },
727
+ {
728
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--535368.wav",
729
+ "key": "SODA_PROCESSED--train--535368",
730
+ "model_output": "No significant overlaps found."
731
+ },
732
+ {
733
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908444.wav",
734
+ "key": "SODA_PROCESSED--train--908444",
735
+ "model_output": "No significant overlaps found."
736
+ },
737
+ {
738
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--748910.wav",
739
+ "key": "SODA_PROCESSED--train--748910",
740
+ "model_output": "No significant overlaps found."
741
+ },
742
+ {
743
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--525710.wav",
744
+ "key": "SODA_PROCESSED--train--525710",
745
+ "model_output": "No significant overlaps found."
746
+ },
747
+ {
748
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--399572.wav",
749
+ "key": "SODA_PROCESSED--train--399572",
750
+ "model_output": "No significant overlaps found."
751
+ },
752
+ {
753
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--737726.wav",
754
+ "key": "SODA_PROCESSED--train--737726",
755
+ "model_output": "No significant overlaps found."
756
+ },
757
+ {
758
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--44625.wav",
759
+ "key": "SODA_PROCESSED--train--44625",
760
+ "model_output": "No significant overlaps found."
761
+ },
762
+ {
763
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1095086.wav",
764
+ "key": "SODA_PROCESSED--train--1095086",
765
+ "model_output": "No significant overlaps found."
766
+ },
767
+ {
768
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--269886.wav",
769
+ "key": "SODA_PROCESSED--train--269886",
770
+ "model_output": "No significant overlaps found."
771
+ },
772
+ {
773
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596068.wav",
774
+ "key": "SODA_PROCESSED--train--596068",
775
+ "model_output": "No significant overlaps found."
776
+ },
777
+ {
778
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--16779.wav",
779
+ "key": "SODA_PROCESSED--train--16779",
780
+ "model_output": "No significant overlaps found."
781
+ },
782
+ {
783
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--532510.wav",
784
+ "key": "SODA_PROCESSED--train--532510",
785
+ "model_output": "No significant overlaps found."
786
+ },
787
+ {
788
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--68508.wav",
789
+ "key": "SODA_PROCESSED--train--68508",
790
+ "model_output": "No significant overlaps found."
791
+ },
792
+ {
793
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--162106.wav",
794
+ "key": "SODA_PROCESSED--train--162106",
795
+ "model_output": "No significant overlaps found."
796
+ },
797
+ {
798
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--831005.wav",
799
+ "key": "SODA_PROCESSED--train--831005",
800
+ "model_output": "No significant overlaps found."
801
+ },
802
+ {
803
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--509788.wav",
804
+ "key": "SODA_PROCESSED--train--509788",
805
+ "model_output": "No significant overlaps found."
806
+ },
807
+ {
808
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--489519.wav",
809
+ "key": "SODA_PROCESSED--train--489519",
810
+ "model_output": "No significant overlaps found."
811
+ },
812
+ {
813
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1020087.wav",
814
+ "key": "SODA_PROCESSED--train--1020087",
815
+ "model_output": "No significant overlaps found."
816
+ },
817
+ {
818
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1050427.wav",
819
+ "key": "SODA_PROCESSED--train--1050427",
820
+ "model_output": "No significant overlaps found."
821
+ },
822
+ {
823
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--842885.wav",
824
+ "key": "SODA_PROCESSED--train--842885",
825
+ "model_output": "No significant overlaps found."
826
+ },
827
+ {
828
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--166191.wav",
829
+ "key": "SODA_PROCESSED--train--166191",
830
+ "model_output": "No significant overlaps found."
831
+ },
832
+ {
833
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--826028.wav",
834
+ "key": "SODA_PROCESSED--train--826028",
835
+ "model_output": "No significant overlaps found."
836
+ },
837
+ {
838
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--715956.wav",
839
+ "key": "SODA_PROCESSED--train--715956",
840
+ "model_output": "No significant overlaps found."
841
+ },
842
+ {
843
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--967872.wav",
844
+ "key": "SODA_PROCESSED--train--967872",
845
+ "model_output": "No significant overlaps found."
846
+ },
847
+ {
848
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--277060.wav",
849
+ "key": "SODA_PROCESSED--train--277060",
850
+ "model_output": "No significant overlaps found."
851
+ },
852
+ {
853
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--803822.wav",
854
+ "key": "SODA_PROCESSED--train--803822",
855
+ "model_output": "No significant overlaps found."
856
+ },
857
+ {
858
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--928982.wav",
859
+ "key": "SODA_PROCESSED--train--928982",
860
+ "model_output": "No significant overlaps found."
861
+ },
862
+ {
863
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--371354.wav",
864
+ "key": "SODA_PROCESSED--train--371354",
865
+ "model_output": "No significant overlaps found."
866
+ },
867
+ {
868
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--12295.wav",
869
+ "key": "SODA_PROCESSED--train--12295",
870
+ "model_output": "No significant overlaps found."
871
+ },
872
+ {
873
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1030451.wav",
874
+ "key": "SODA_PROCESSED--train--1030451",
875
+ "model_output": "No significant overlaps found."
876
+ },
877
+ {
878
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--168398.wav",
879
+ "key": "SODA_PROCESSED--train--168398",
880
+ "model_output": "No significant overlaps found."
881
+ },
882
+ {
883
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--556505.wav",
884
+ "key": "SODA_PROCESSED--train--556505",
885
+ "model_output": "No significant overlaps found."
886
+ },
887
+ {
888
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--123906.wav",
889
+ "key": "SODA_PROCESSED--train--123906",
890
+ "model_output": "No significant overlaps found."
891
+ },
892
+ {
893
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1120331.wav",
894
+ "key": "SODA_PROCESSED--train--1120331",
895
+ "model_output": "No significant overlaps found."
896
+ },
897
+ {
898
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--121129.wav",
899
+ "key": "SODA_PROCESSED--train--121129",
900
+ "model_output": "No significant overlaps found."
901
+ },
902
+ {
903
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--690063.wav",
904
+ "key": "SODA_PROCESSED--train--690063",
905
+ "model_output": "No significant overlaps found."
906
+ },
907
+ {
908
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--334902.wav",
909
+ "key": "SODA_PROCESSED--train--334902",
910
+ "model_output": "No significant overlaps found."
911
+ },
912
+ {
913
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--442672.wav",
914
+ "key": "SODA_PROCESSED--train--442672",
915
+ "model_output": "No significant overlaps found."
916
+ },
917
+ {
918
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--640494.wav",
919
+ "key": "SODA_PROCESSED--train--640494",
920
+ "model_output": "No significant overlaps found."
921
+ },
922
+ {
923
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--171463.wav",
924
+ "key": "SODA_PROCESSED--train--171463",
925
+ "model_output": "No significant overlaps found."
926
+ },
927
+ {
928
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--565809.wav",
929
+ "key": "SODA_PROCESSED--train--565809",
930
+ "model_output": "No significant overlaps found."
931
+ },
932
+ {
933
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--329396.wav",
934
+ "key": "SODA_PROCESSED--train--329396",
935
+ "model_output": "No significant overlaps found."
936
+ },
937
+ {
938
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1090942.wav",
939
+ "key": "SODA_PROCESSED--train--1090942",
940
+ "model_output": "No significant overlaps found."
941
+ },
942
+ {
943
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--980776.wav",
944
+ "key": "SODA_PROCESSED--train--980776",
945
+ "model_output": "No significant overlaps found."
946
+ },
947
+ {
948
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--29858.wav",
949
+ "key": "SODA_PROCESSED--train--29858",
950
+ "model_output": "No significant overlaps found."
951
+ },
952
+ {
953
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--596349.wav",
954
+ "key": "SODA_PROCESSED--train--596349",
955
+ "model_output": "No significant overlaps found."
956
+ },
957
+ {
958
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--604536.wav",
959
+ "key": "SODA_PROCESSED--train--604536",
960
+ "model_output": "No significant overlaps found."
961
+ },
962
+ {
963
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--500115.wav",
964
+ "key": "SODA_PROCESSED--train--500115",
965
+ "model_output": "No significant overlaps found."
966
+ },
967
+ {
968
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--605295.wav",
969
+ "key": "SODA_PROCESSED--train--605295",
970
+ "model_output": "No significant overlaps found."
971
+ },
972
+ {
973
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--310941.wav",
974
+ "key": "SODA_PROCESSED--train--310941",
975
+ "model_output": "No significant overlaps found."
976
+ },
977
+ {
978
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1051089.wav",
979
+ "key": "SODA_PROCESSED--train--1051089",
980
+ "model_output": "No significant overlaps found."
981
+ },
982
+ {
983
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--187351.wav",
984
+ "key": "SODA_PROCESSED--train--187351",
985
+ "model_output": "No significant overlaps found."
986
+ },
987
+ {
988
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--645254.wav",
989
+ "key": "SODA_PROCESSED--train--645254",
990
+ "model_output": "No significant overlaps found."
991
+ },
992
+ {
993
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1066203.wav",
994
+ "key": "SODA_PROCESSED--train--1066203",
995
+ "model_output": "No significant overlaps found."
996
+ },
997
+ {
998
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--913166.wav",
999
+ "key": "SODA_PROCESSED--train--913166",
1000
+ "model_output": "No significant overlaps found."
1001
+ },
1002
+ {
1003
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--583204.wav",
1004
+ "key": "SODA_PROCESSED--train--583204",
1005
+ "model_output": "No significant overlaps found."
1006
+ },
1007
+ {
1008
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--575640.wav",
1009
+ "key": "SODA_PROCESSED--train--575640",
1010
+ "model_output": "No significant overlaps found."
1011
+ },
1012
+ {
1013
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--109428.wav",
1014
+ "key": "SODA_PROCESSED--train--109428",
1015
+ "model_output": "No significant overlaps found."
1016
+ },
1017
+ {
1018
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--246434.wav",
1019
+ "key": "SODA_PROCESSED--train--246434",
1020
+ "model_output": "No significant overlaps found."
1021
+ },
1022
+ {
1023
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--977434.wav",
1024
+ "key": "SODA_PROCESSED--train--977434",
1025
+ "model_output": "No significant overlaps found."
1026
+ },
1027
+ {
1028
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--665430.wav",
1029
+ "key": "SODA_PROCESSED--train--665430",
1030
+ "model_output": "No significant overlaps found."
1031
+ },
1032
+ {
1033
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--960193.wav",
1034
+ "key": "SODA_PROCESSED--train--960193",
1035
+ "model_output": "No significant overlaps found."
1036
+ },
1037
+ {
1038
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--31287.wav",
1039
+ "key": "SODA_PROCESSED--train--31287",
1040
+ "model_output": "No significant overlaps found."
1041
+ },
1042
+ {
1043
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--254497.wav",
1044
+ "key": "SODA_PROCESSED--train--254497",
1045
+ "model_output": "No significant overlaps found."
1046
+ },
1047
+ {
1048
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--774546.wav",
1049
+ "key": "SODA_PROCESSED--train--774546",
1050
+ "model_output": "No significant overlaps found."
1051
+ },
1052
+ {
1053
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--273875.wav",
1054
+ "key": "SODA_PROCESSED--train--273875",
1055
+ "model_output": "No significant overlaps found."
1056
+ },
1057
+ {
1058
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--822773.wav",
1059
+ "key": "SODA_PROCESSED--train--822773",
1060
+ "model_output": "No significant overlaps found."
1061
+ },
1062
+ {
1063
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1052554.wav",
1064
+ "key": "SODA_PROCESSED--train--1052554",
1065
+ "model_output": "No significant overlaps found."
1066
+ },
1067
+ {
1068
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--179972.wav",
1069
+ "key": "SODA_PROCESSED--train--179972",
1070
+ "model_output": "No significant overlaps found."
1071
+ },
1072
+ {
1073
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1117467.wav",
1074
+ "key": "SODA_PROCESSED--train--1117467",
1075
+ "model_output": "No significant overlaps found."
1076
+ },
1077
+ {
1078
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--961025.wav",
1079
+ "key": "SODA_PROCESSED--train--961025",
1080
+ "model_output": "No significant overlaps found."
1081
+ },
1082
+ {
1083
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--923496.wav",
1084
+ "key": "SODA_PROCESSED--train--923496",
1085
+ "model_output": "No significant overlaps found."
1086
+ },
1087
+ {
1088
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--41171.wav",
1089
+ "key": "SODA_PROCESSED--train--41171",
1090
+ "model_output": "No significant overlaps found."
1091
+ },
1092
+ {
1093
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--679971.wav",
1094
+ "key": "SODA_PROCESSED--train--679971",
1095
+ "model_output": "No significant overlaps found."
1096
+ },
1097
+ {
1098
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--876910.wav",
1099
+ "key": "SODA_PROCESSED--train--876910",
1100
+ "model_output": "No significant overlaps found."
1101
+ },
1102
+ {
1103
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--617278.wav",
1104
+ "key": "SODA_PROCESSED--train--617278",
1105
+ "model_output": "No significant overlaps found."
1106
+ },
1107
+ {
1108
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--463700.wav",
1109
+ "key": "SODA_PROCESSED--train--463700",
1110
+ "model_output": "No significant overlaps found."
1111
+ },
1112
+ {
1113
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1186623.wav",
1114
+ "key": "SODA_PROCESSED--train--1186623",
1115
+ "model_output": "No significant overlaps found."
1116
+ },
1117
+ {
1118
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1076109.wav",
1119
+ "key": "SODA_PROCESSED--train--1076109",
1120
+ "model_output": "No significant overlaps found."
1121
+ },
1122
+ {
1123
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--414445.wav",
1124
+ "key": "SODA_PROCESSED--train--414445",
1125
+ "model_output": "No significant overlaps found."
1126
+ },
1127
+ {
1128
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--546350.wav",
1129
+ "key": "SODA_PROCESSED--train--546350",
1130
+ "model_output": "No significant overlaps found."
1131
+ },
1132
+ {
1133
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1144076.wav",
1134
+ "key": "SODA_PROCESSED--train--1144076",
1135
+ "model_output": "No significant overlaps found."
1136
+ },
1137
+ {
1138
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--104948.wav",
1139
+ "key": "SODA_PROCESSED--train--104948",
1140
+ "model_output": "No significant overlaps found."
1141
+ },
1142
+ {
1143
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--836740.wav",
1144
+ "key": "SODA_PROCESSED--train--836740",
1145
+ "model_output": "No significant overlaps found."
1146
+ },
1147
+ {
1148
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--437951.wav",
1149
+ "key": "SODA_PROCESSED--train--437951",
1150
+ "model_output": "No significant overlaps found."
1151
+ }
1152
+ ]
ms-swift/silence_overlaps/700/test/.ipynb_checkpoints/silence_isoverlaps_test-checkpoint.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--484021.wav",
4
+ "key": "SODA_PROCESSED--train--484021",
5
+ "model_output": "No significant overlaps found."
6
+ },
7
+ {
8
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--934782.wav",
9
+ "key": "SODA_PROCESSED--train--934782",
10
+ "model_output": "No significant overlaps found."
11
+ },
12
+ {
13
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1106201.wav",
14
+ "key": "SODA_PROCESSED--train--1106201",
15
+ "model_output": "No significant overlaps found."
16
+ },
17
+ {
18
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007774.wav",
19
+ "key": "SODA_PROCESSED--train--1007774",
20
+ "model_output": "No significant overlaps found."
21
+ },
22
+ {
23
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708343.wav",
24
+ "key": "SODA_PROCESSED--train--708343",
25
+ "model_output": "No significant overlaps found."
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/test/overlap5s_transcriptions_test.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "key": "SODA_PROCESSED--train--730163",
4
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--730163.wav",
5
+ "model_output": "[00:00 - 00:01] Speaker A: Hey Cora, what's up?\n[00:01 - 00:04] Speaker B: Not much, just hanging out. What about you?\n[00:04 - 00:15] Speaker A: I'm good, just hanging out with some friends. You should come over if you're free. We're having a great time playing board games and catching up on all the latest news and gossip.\n[00:08 - 00:12] Speaker B: Wait, where are you guys hanging out? I might be more tempted if it's somewhere close.\n[00:15 - 00:19] Speaker A: We're at the new café downtown. You should come join us!\n[00:19 - 00:21] Speaker B: Nah, I don't really feel like it.\n[00:22 - 00:30] Speaker A: Come on, it'll be fun! I'll buy you a coffee or maybe something sweet to go with it like their famous chocolate croissants that just came out of the oven.\n[00:30 - 00:34] Speaker B: Are there a lot of people there? I kinda want to avoid a crowd.\n[00:34 - 00:39] Speaker A: It's pretty chill right now, not too crowded. So, how about that drink?\n[00:39 - 00:41] Speaker B: Alright, fine. I'll come for one drink."
6
+ },
7
+ {
8
+ "key": "SODA_PROCESSED--train--1180329",
9
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--1180329.wav",
10
+ "model_output": "[00:00 - 00:01] Speaker A: I'm sorry I failed the exam.\n[00:02 - 00:06] Speaker B: Ryann, what happened? You were doing so well in class.\n[00:06 - 00:15] Speaker A: I don't know. I just couldn't focus on the questions, even though I had studied them thoroughly and practiced similar problems multiple times before the exam.\n[00:09 - 00:15] Speaker B: Wait, when you say you couldn't focus, do you mean you were distracted by something specific, or was it just a general feeling?\n[00:16 - 00:20] Speaker A: It was just a general feeling. I couldn't focus on the test at all.\n[00:20 - 00:23] Speaker B: Did something happen that day that may have distracted you?\n[00:23 - 00:28] Speaker A: No, nothing happened. I just couldn't concentrate, no matter how hard I tried.\n[00:29 - 00:33] Speaker B: Sometimes stress or anxiety can sneak up on us. Do you think that might have been a factor?\n[00:34 - 00:39] Speaker A: Maybe, but I'm positive nothing specific happened. I just couldn't focus."
11
+ },
12
+ {
13
+ "key": "SODA_PROCESSED--train--366333",
14
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--366333.wav",
15
+ "model_output": "[00:00 - 00:15] Speaker A: Hi, I'm calling about the job opening. I saw the posting online and I'm very interested in applying for the position and learning more about the company's culture, values, and the specific challenges this role would be addressing in the current market landscape.\n[00:06 - 00:12] Speaker B: Sorry to jump in, but could you clarify which position you're referring to? We have several openings right now.\n[00:16 - 00:19] Speaker A: Oh, sure! I'm interested in the Marketing Manager position.\n[00:19 - 00:24] Speaker B: Great! We're looking for someone with your skills and experience. Tell me a little bit about yourself.\n[00:24 - 00:44] Speaker A: Well, I have a degree in marketing and I've been working in the field for about 5 years now. I've had a lot of success with the campaigns I've worked on and I think my skills would be a great fit for contributing to your company's growth and driving successful marketing strategies that align with your brand vision while adapting to emerging digital marketing trends.\n[00:45 - 00:51] Speaker B: That sounds impressive! Just to make sure, have you had experience managing a team, as this role will require leadership skills?\n[00:52 - 00:58] Speaker A: Yes, I've led a small team of 5 in my current role, and we've consistently met our targets.\n[00:58 - 01:04] Speaker B: Wow, it sounds like you would be perfect for this position! Are you available to come in for an interview tomorrow?\n[01:04 - 01:07] Speaker A: Absolutely! I'll be there at 10am.\n[01:07 - 01:08] Speaker B: Great, we'll see you then!"
16
+ },
17
+ {
18
+ "key": "SODA_PROCESSED--train--762905",
19
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--762905.wav",
20
+ "model_output": "[00:00 - 00:22] Speaker A: Sarah, I've been feeling really stuck lately. I'm not sure if I'm happy with my current situation. I'm considering a change of scenery, a new job, or even moving to a new city. What do you think about making such a big change at this point in my life? I'm particularly concerned about the financial implications and whether I'm emotionally ready for such a major transition.\n[00:12 - 00:22] Speaker B: Sorry to jump in, but when you say \"change of scenery,\" do you mean you're leaning more towards moving to a new city, or is it more about finding a new job where you are now?\n[00:23 - 00:35] Speaker A: That's a good question. Honestly, I'm not sure yet. It could be either, or maybe even both. I just feel like something needs to change. I'm not sure what exactly, but something just doesn't feel right.\n[00:35 - 00:52] Speaker B: Maybe it would help to sit down and make a list of pros and cons for each option? That way you can get all of your thoughts out on paper and see things more clearly. Plus, it might help you to talk through your options with someone else too. So if you want to bounce any ideas off me, I'm here for any brainstorming or just to listen if you need to vent.\n[00:53 - 01:02] Speaker A: Actually, that sounds perfect. I think talking it out with you could really help me figure things out. Thank you, Sarah. I really appreciate your support.\n[01:02 - 01:15] Speaker B: Absolutely! I'm always here for you, Veronika. Let me know when you want to schedule a proper brainstorming session where we can go through all your options in detail and maybe even research some potential cities or job opportunities together."
21
+ },
22
+ {
23
+ "key": "SODA_PROCESSED--train--836452",
24
+ "audio_url": "/root/autodl-tmp/output_overlapslong/newoverlapjson/overlap5s700/SODA_PROCESSED--train--836452.wav",
25
+ "model_output": "[00:00 - 00:01] Speaker A: So, what's the verdict?\n[00:01 - 00:20] Speaker B: Well, overall, your health is good. Your blood work looks great and your physical exam was normal. However, we did find something abnormal on your mammogram. There's a mass in your left breast that we're going to need to investigate further with additional tests including an ultrasound and possibly an MRI to get a clearer picture of what we're dealing with.\n[00:13 - 00:14] Speaker A: What does that mean?\n[00:20 - 00:25] Speaker B: It could be benign or it could be cancerous. We'll need to do a biopsy to know for sure.\n[00:25 - 00:33] Speaker A: Before we discuss the next step, I just want to express how anxious I am about this. How soon can we get the biopsy done?\n[00:34 - 00:45] Speaker B: We'll schedule you for a biopsy as soon as possible. After we get the results back, we'll know more about what treatment options are available and develop a comprehensive care plan tailored to your specific needs."
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/test/silence_isoverlaps_test.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--484021.wav",
4
+ "key": "SODA_PROCESSED--train--484021",
5
+ "model_output": "No significant overlaps found."
6
+ },
7
+ {
8
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--934782.wav",
9
+ "key": "SODA_PROCESSED--train--934782",
10
+ "model_output": "No significant overlaps found."
11
+ },
12
+ {
13
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1106201.wav",
14
+ "key": "SODA_PROCESSED--train--1106201",
15
+ "model_output": "No significant overlaps found."
16
+ },
17
+ {
18
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1007774.wav",
19
+ "key": "SODA_PROCESSED--train--1007774",
20
+ "model_output": "No significant overlaps found."
21
+ },
22
+ {
23
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--708343.wav",
24
+ "key": "SODA_PROCESSED--train--708343",
25
+ "model_output": "No significant overlaps found."
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/test/silence_issilence_test.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--84371.wav",
4
+ "key": "SODA_PROCESSED--train--84371",
5
+ "model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:32 to 00:37."
6
+ },
7
+ {
8
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--185821.wav",
9
+ "key": "SODA_PROCESSED--train--185821",
10
+ "model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:16 to 00:22."
11
+ },
12
+ {
13
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--908191.wav",
14
+ "key": "SODA_PROCESSED--train--908191",
15
+ "model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:09 to 00:14."
16
+ },
17
+ {
18
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--203528.wav",
19
+ "key": "SODA_PROCESSED--train--203528",
20
+ "model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:20 to 00:25."
21
+ },
22
+ {
23
+ "audio_url": "/root/autodl-tmp/output_silence45/json/silence/SODA_PROCESSED--train--1168213.wav",
24
+ "key": "SODA_PROCESSED--train--1168213",
25
+ "model_output": "Yes, There is a silence gap, multiple speakers were silent from 00:08 to 00:13."
26
+ }
27
+ ]
ms-swift/silence_overlaps/700/train/.ipynb_checkpoints/silence_speaker_segments_train-checkpoint.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/700/train/silence_transcriptions_train.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/delete_transcript2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
ms-swift/silence_overlaps/only_overlap/overlap5s_isoverlap_train.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/overlap5s_issilence.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/silence_transcriptions.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/silence_overlaps/test/.ipynb_checkpoints/test-checkpoint.json ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SODA_PROCESSED--train--790538": {
3
+ "original_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA: Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
4
+ "cleaned_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA:Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
5
+ "total_duration": 70.17238095238095,
6
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/stereo_dialogue.wav",
7
+ "speaker_tracks": {
8
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/A_track.wav",
9
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/B_track.wav"
10
+ },
11
+ "error_type": "error_after_interrupt",
12
+ "segments": [
13
+ {
14
+ "speaker": "A",
15
+ "text": "Hey Val, are you all packed for our camping trip?",
16
+ "original_text": "Hey Val, are you all packed for our camping trip?",
17
+ "start_time": 0,
18
+ "end_time": 3.355283446712018,
19
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_0_A.wav",
20
+ "silence_duration": 0,
21
+ "is_interrupted": false
22
+ },
23
+ {
24
+ "speaker": "B",
25
+ "text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
26
+ "original_text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
27
+ "start_time": 3.7205840462203787,
28
+ "end_time": 8.2368652253587,
29
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_1_B.wav",
30
+ "silence_duration": 0.3653005995083607,
31
+ "is_interrupted": false
32
+ },
33
+ {
34
+ "speaker": "A",
35
+ "text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of",
36
+ "original_text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.",
37
+ "start_time": 8.610541776107988,
38
+ "end_time": 23.808002093568305,
39
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_2_A.wav",
40
+ "silence_duration": 0.37367655074928696,
41
+ "is_interrupted": true,
42
+ "text_after_interrupt": "fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience."
43
+ },
44
+ {
45
+ "speaker": "B",
46
+ "text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
47
+ "original_text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
48
+ "start_time": 15.588138147990074,
49
+ "end_time": 19.686460143454926,
50
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_3_B.wav",
51
+ "silence_duration": 0.407293268908306,
52
+ "is_interrupted": false
53
+ },
54
+ {
55
+ "speaker": "A",
56
+ "text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
57
+ "original_text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
58
+ "start_time": 24.198980395182797,
59
+ "end_time": 30.189728694502527,
60
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_4_A.wav",
61
+ "silence_duration": 0.39097830161449093,
62
+ "is_interrupted": false
63
+ },
64
+ {
65
+ "speaker": "B",
66
+ "text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
67
+ "original_text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
68
+ "start_time": 30.59422649262823,
69
+ "end_time": 35.60973669670986,
70
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_5_B.wav",
71
+ "silence_duration": 0.4044977981257025,
72
+ "is_interrupted": false
73
+ },
74
+ {
75
+ "speaker": "A",
76
+ "text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
77
+ "original_text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
78
+ "start_time": 36.08564285355298,
79
+ "end_time": 45.52455441817884,
80
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_6_A.wav",
81
+ "silence_duration": 0.47590615684312176,
82
+ "is_interrupted": false
83
+ },
84
+ {
85
+ "speaker": "B",
86
+ "text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
87
+ "original_text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
88
+ "start_time": 45.86997259942616,
89
+ "end_time": 50.33981386926743,
90
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_7_B.wav",
91
+ "silence_duration": 0.34541818124732176,
92
+ "is_interrupted": false
93
+ },
94
+ {
95
+ "speaker": "A",
96
+ "text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
97
+ "original_text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
98
+ "start_time": 50.65173673057719,
99
+ "end_time": 56.66570498454544,
100
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_8_A.wav",
101
+ "silence_duration": 0.3119228613097595,
102
+ "is_interrupted": false
103
+ },
104
+ {
105
+ "speaker": "B",
106
+ "text": "Definitely. So, what time are we leaving tomorrow morning?",
107
+ "original_text": "Definitely. So, what time are we leaving tomorrow morning?",
108
+ "start_time": 57.18372257432499,
109
+ "end_time": 60.84086543146785,
110
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_9_B.wav",
111
+ "silence_duration": 0.5180175897795466,
112
+ "is_interrupted": false
113
+ },
114
+ {
115
+ "speaker": "A",
116
+ "text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
117
+ "original_text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
118
+ "start_time": 61.21524274027814,
119
+ "end_time": 67.84453979243234,
120
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_10_A.wav",
121
+ "silence_duration": 0.37437730881029474,
122
+ "is_interrupted": false
123
+ },
124
+ {
125
+ "speaker": "B",
126
+ "text": "Sounds perfect. See you then!",
127
+ "original_text": "Sounds perfect. See you then!",
128
+ "start_time": 68.44252417070729,
129
+ "end_time": 70.17241079202248,
130
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_11_B.wav",
131
+ "silence_duration": 0.5979843782749388,
132
+ "is_interrupted": false
133
+ }
134
+ ]
135
+ },
136
+ "SODA_PROCESSED--train--123906": {
137
+ "original_text": "A: You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
138
+ "cleaned_text": "A:You know, it's really annoying when things are loose and they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
139
+ "total_duration": 81.61818594104308,
140
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/stereo_dialogue.wav",
141
+ "speaker_tracks": {
142
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/A_track.wav",
143
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/B_track.wav"
144
+ },
145
+ "error_type": "error_after_interrupt",
146
+ "segments": [
147
+ {
148
+ "speaker": "A",
149
+ "text": "You know, it's really annoying when things are loose and",
150
+ "original_text": "You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.",
151
+ "start_time": 0,
152
+ "end_time": 13.293424036281179,
153
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_0_A.wav",
154
+ "silence_duration": 0,
155
+ "is_interrupted": true,
156
+ "text_after_interrupt": "they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship."
157
+ },
158
+ {
159
+ "speaker": "B",
160
+ "text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
161
+ "original_text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
162
+ "start_time": 3.250793650793651,
163
+ "end_time": 9.961360544217687,
164
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_1_B.wav",
165
+ "silence_duration": 0.5919206270367532,
166
+ "is_interrupted": false
167
+ },
168
+ {
169
+ "speaker": "A",
170
+ "text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
171
+ "original_text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
172
+ "start_time": 13.747669310225408,
173
+ "end_time": 19.123088811359196,
174
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_2_A.wav",
175
+ "silence_duration": 0.4542452739442295,
176
+ "is_interrupted": false
177
+ },
178
+ {
179
+ "speaker": "B",
180
+ "text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
181
+ "original_text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
182
+ "start_time": 19.55223911029212,
183
+ "end_time": 26.39051575428305,
184
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_3_B.wav",
185
+ "silence_duration": 0.4291502989329248,
186
+ "is_interrupted": false
187
+ },
188
+ {
189
+ "speaker": "A",
190
+ "text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
191
+ "original_text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
192
+ "start_time": 26.849330263466445,
193
+ "end_time": 39.388105773670524,
194
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_4_A.wav",
195
+ "silence_duration": 0.45881450918339584,
196
+ "is_interrupted": false
197
+ },
198
+ {
199
+ "speaker": "B",
200
+ "text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
201
+ "original_text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
202
+ "start_time": 39.82405615117125,
203
+ "end_time": 51.73589288586513,
204
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_5_B.wav",
205
+ "silence_duration": 0.4359503775007275,
206
+ "is_interrupted": false
207
+ },
208
+ {
209
+ "speaker": "A",
210
+ "text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
211
+ "original_text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
212
+ "start_time": 52.27556460896347,
213
+ "end_time": 58.811981842523565,
214
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_6_A.wav",
215
+ "silence_duration": 0.5396717230983441,
216
+ "is_interrupted": false
217
+ },
218
+ {
219
+ "speaker": "B",
220
+ "text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
221
+ "original_text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
222
+ "start_time": 59.3890926661041,
223
+ "end_time": 63.90537384524242,
224
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_7_B.wav",
225
+ "silence_duration": 0.5771108235805331,
226
+ "is_interrupted": false
227
+ },
228
+ {
229
+ "speaker": "A",
230
+ "text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
231
+ "original_text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
232
+ "start_time": 64.32218282146329,
233
+ "end_time": 72.3911170618261,
234
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_8_A.wav",
235
+ "silence_duration": 0.4168089762208619,
236
+ "is_interrupted": false
237
+ },
238
+ {
239
+ "speaker": "B",
240
+ "text": "Yeah, you probably should. Thanks for fixing that, by the way.",
241
+ "original_text": "Yeah, you probably should. Thanks for fixing that, by the way.",
242
+ "start_time": 72.7979076035892,
243
+ "end_time": 76.45505046073205,
244
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_9_B.wav",
245
+ "silence_duration": 0.4067905417630988,
246
+ "is_interrupted": false
247
+ },
248
+ {
249
+ "speaker": "A",
250
+ "text": "No problem. Just doing my part to keep things in working order around here.",
251
+ "original_text": "No problem. Just doing my part to keep things in working order around here.",
252
+ "start_time": 76.90457674986,
253
+ "end_time": 81.6182275435108,
254
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_10_A.wav",
255
+ "silence_duration": 0.4495262891279488,
256
+ "is_interrupted": false
257
+ }
258
+ ]
259
+ },
260
+ "SODA_PROCESSED--train--1112763": {
261
+ "original_dialog_id": "",
262
+ "dialog_index": 1112763,
263
+ "processed_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA: Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable. \nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
264
+ "clean_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA:Just standing in this pool of water, trying to escape the heat. It's really hot out and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.\nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
265
+ "speaker_tracks": {
266
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/A_track.wav",
267
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/B_track.wav"
268
+ },
269
+ "error_type": "error_after_interrupt",
270
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/stereo_dialogue.wav",
271
+ "total_duration": 80.37741496598639,
272
+ "segments": [
273
+ {
274
+ "speaker": "A",
275
+ "text": "Hey Sarah, what's up?",
276
+ "original_text": "Hey Sarah, what's up?",
277
+ "start_time": 0,
278
+ "end_time": 1.6486167800453515,
279
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_0_A.wav",
280
+ "silence_duration": 0,
281
+ "is_interrupted": false
282
+ },
283
+ {
284
+ "speaker": "B",
285
+ "text": "Not much, what are you up to?",
286
+ "original_text": "Not much, what are you up to?",
287
+ "start_time": 2.244027328872719,
288
+ "end_time": 3.497904879893127,
289
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_1_B.wav",
290
+ "silence_duration": 0.5954105488273673,
291
+ "is_interrupted": false
292
+ },
293
+ {
294
+ "speaker": "A",
295
+ "text": "Just standing in this pool of water, trying to escape the heat. It's really hot out",
296
+ "original_text": "Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.",
297
+ "start_time": 3.9748179407694604,
298
+ "end_time": 18.847198893150413,
299
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_2_A.wav",
300
+ "silence_duration": 0.47691306087633345,
301
+ "is_interrupted": true,
302
+ "text_after_interrupt": "and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable."
303
+ },
304
+ {
305
+ "speaker": "B",
306
+ "text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
307
+ "original_text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
308
+ "start_time": 9.09481794076946,
309
+ "end_time": 15.352595718547239,
310
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_3_B.wav",
311
+ "silence_duration": 0.5962546040921268,
312
+ "is_interrupted": false
313
+ },
314
+ {
315
+ "speaker": "A",
316
+ "text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
317
+ "original_text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
318
+ "start_time": 19.411937689342565,
319
+ "end_time": 39.3462687551022,
320
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_4_A.wav",
321
+ "silence_duration": 0.5647387961921536,
322
+ "is_interrupted": false
323
+ },
324
+ {
325
+ "speaker": "B",
326
+ "text": "That sounds nice. I wish I could be in a pool right now too.",
327
+ "original_text": "That sounds nice. I wish I could be in a pool right now too.",
328
+ "start_time": 39.7352726618639,
329
+ "end_time": 42.986066312657556,
330
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_5_B.wav",
331
+ "silence_duration": 0.3890039067616998,
332
+ "is_interrupted": false
333
+ },
334
+ {
335
+ "speaker": "A",
336
+ "text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
337
+ "original_text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
338
+ "start_time": 43.57081570786076,
339
+ "end_time": 58.803105957293866,
340
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_6_A.wav",
341
+ "silence_duration": 0.5847493952032004,
342
+ "is_interrupted": false
343
+ },
344
+ {
345
+ "speaker": "B",
346
+ "text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
347
+ "original_text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
348
+ "start_time": 59.111149637610815,
349
+ "end_time": 63.94090020450424,
350
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_7_B.wav",
351
+ "silence_duration": 0.3080436803169506,
352
+ "is_interrupted": false
353
+ },
354
+ {
355
+ "speaker": "A",
356
+ "text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
357
+ "original_text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
358
+ "start_time": 64.4324265657298,
359
+ "end_time": 78.10897985371166,
360
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_8_A.wav",
361
+ "silence_duration": 0.4915263612255627,
362
+ "is_interrupted": false
363
+ },
364
+ {
365
+ "speaker": "B",
366
+ "text": "Me too, Cleveland. Me too.",
367
+ "original_text": "Me too, Cleveland. Me too.",
368
+ "start_time": 78.5430769468726,
369
+ "end_time": 80.37745336410616,
370
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_9_B.wav",
371
+ "silence_duration": 0.4340970931609471,
372
+ "is_interrupted": false
373
+ }
374
+ ]
375
+ },
376
+ "SODA_PROCESSED--train--822773": {
377
+ "original_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA: I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
378
+ "cleaned_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA:I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
379
+ "total_duration": 45.90185941043084,
380
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/stereo_dialogue.wav",
381
+ "speaker_tracks": {
382
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/A_track.wav",
383
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/B_track.wav"
384
+ },
385
+ "error_type": "error_after_interrupt",
386
+ "segments": [
387
+ {
388
+ "speaker": "A",
389
+ "text": "Hey, Sarah! I'm so glad to see you!",
390
+ "original_text": "Hey, Sarah! I'm so glad to see you!",
391
+ "start_time": 0,
392
+ "end_time": 2.7747845804988662,
393
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_0_A.wav",
394
+ "silence_duration": 0,
395
+ "is_interrupted": false
396
+ },
397
+ {
398
+ "speaker": "B",
399
+ "text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
400
+ "original_text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
401
+ "start_time": 3.1698846088737547,
402
+ "end_time": 7.268206604338607,
403
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_1_B.wav",
404
+ "silence_duration": 0.39510002837488867,
405
+ "is_interrupted": false
406
+ },
407
+ {
408
+ "speaker": "A",
409
+ "text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with",
410
+ "original_text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.",
411
+ "start_time": 7.859956363848243,
412
+ "end_time": 22.209888336637356,
413
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_2_A.wav",
414
+ "silence_duration": 0.5917497595096354,
415
+ "is_interrupted": true,
416
+ "text_after_interrupt": "me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities."
417
+ },
418
+ {
419
+ "speaker": "B",
420
+ "text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
421
+ "original_text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
422
+ "start_time": 14.558913279948015,
423
+ "end_time": 19.574423484029648,
424
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_3_B.wav",
425
+ "silence_duration": 0.35472772157833465,
426
+ "is_interrupted": false
427
+ },
428
+ {
429
+ "speaker": "A",
430
+ "text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
431
+ "original_text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
432
+ "start_time": 22.580509090185526,
433
+ "end_time": 31.798831085650377,
434
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_4_A.wav",
435
+ "silence_duration": 0.3706207535481718,
436
+ "is_interrupted": false
437
+ },
438
+ {
439
+ "speaker": "B",
440
+ "text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
441
+ "original_text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
442
+ "start_time": 32.37886103861385,
443
+ "end_time": 36.53523292070002,
444
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_5_B.wav",
445
+ "silence_duration": 0.5800299529634694,
446
+ "is_interrupted": false
447
+ },
448
+ {
449
+ "speaker": "A",
450
+ "text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
451
+ "original_text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
452
+ "start_time": 36.96218842583218,
453
+ "end_time": 45.90187096551472,
454
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_6_A.wav",
455
+ "silence_duration": 0.4269555051321636,
456
+ "is_interrupted": false
457
+ }
458
+ ]
459
+ },
460
+ "SODA_PROCESSED--train--424960": {
461
+ "original_dialog_id": "",
462
+ "dialog_index": 424960,
463
+ "processed_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB: Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it. \nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
464
+ "clean_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB:Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.\nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
465
+ "speaker_tracks": {
466
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/A_track.wav",
467
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/B_track.wav"
468
+ },
469
+ "error_type": "error_after_interrupt",
470
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/stereo_dialogue.wav",
471
+ "total_duration": 61.32780045351474,
472
+ "segments": [
473
+ {
474
+ "speaker": "A",
475
+ "text": "So, you've traveled a lot?",
476
+ "original_text": "So, you've traveled a lot?",
477
+ "start_time": 0,
478
+ "end_time": 1.474467120181406,
479
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_0_A.wav",
480
+ "silence_duration": 0,
481
+ "is_interrupted": false
482
+ },
483
+ {
484
+ "speaker": "B",
485
+ "text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
486
+ "original_text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
487
+ "start_time": 2.068172914767877,
488
+ "end_time": 7.826721667602344,
489
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_1_B.wav",
490
+ "silence_duration": 0.5937057945864714,
491
+ "is_interrupted": false
492
+ },
493
+ {
494
+ "speaker": "A",
495
+ "text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
496
+ "original_text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
497
+ "start_time": 8.340391189370486,
498
+ "end_time": 18.67327100796459,
499
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_2_A.wav",
500
+ "silence_duration": 0.5136695217681417,
501
+ "is_interrupted": false
502
+ },
503
+ {
504
+ "speaker": "B",
505
+ "text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new",
506
+ "original_text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.",
507
+ "start_time": 19.06107440491716,
508
+ "end_time": 36.000031321016934,
509
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_3_B.wav",
510
+ "silence_duration": 0.38780339695257027,
511
+ "is_interrupted": true,
512
+ "text_after_interrupt": "people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it."
513
+ },
514
+ {
515
+ "speaker": "A",
516
+ "text": "Speaking of cultures, which one left the biggest impression on you?",
517
+ "original_text": "Speaking of cultures, which one left the biggest impression on you?",
518
+ "start_time": 29.660983701969315,
519
+ "end_time": 33.329736536436435,
520
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_4_A.wav",
521
+ "silence_duration": 0.5959806021294678,
522
+ "is_interrupted": false
523
+ },
524
+ {
525
+ "speaker": "B",
526
+ "text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
527
+ "original_text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
528
+ "start_time": 36.35174674763023,
529
+ "end_time": 48.86730230318579,
530
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_5_B.wav",
531
+ "silence_duration": 0.3517154266132969,
532
+ "is_interrupted": false
533
+ },
534
+ {
535
+ "speaker": "A",
536
+ "text": "It must be wonderful to be able to see the world like that.",
537
+ "original_text": "It must be wonderful to be able to see the world like that.",
538
+ "start_time": 49.35988001043792,
539
+ "end_time": 52.57584372925878,
540
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_6_A.wav",
541
+ "silence_duration": 0.49257770725213246,
542
+ "is_interrupted": false
543
+ },
544
+ {
545
+ "speaker": "B",
546
+ "text": "It is. I highly recommend it if you ever get the chance.",
547
+ "original_text": "It is. I highly recommend it if you ever get the chance.",
548
+ "start_time": 53.04658029347679,
549
+ "end_time": 56.18127417102781,
550
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_7_B.wav",
551
+ "silence_duration": 0.4707365642180112,
552
+ "is_interrupted": false
553
+ },
554
+ {
555
+ "speaker": "A",
556
+ "text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
557
+ "original_text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
558
+ "start_time": 56.57932539417633,
559
+ "end_time": 61.32780611979991,
560
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_8_A.wav",
561
+ "silence_duration": 0.39805122314851726,
562
+ "is_interrupted": false
563
+ }
564
+ ]
565
+ }
566
+ }
ms-swift/silence_overlaps/test/test.json ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SODA_PROCESSED--train--790538": {
3
+ "original_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA: Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
4
+ "cleaned_text": "A: Hey Val, are you all packed for our camping trip?\nB: Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?\nA:Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.\nB: Actually, I just realized I might have forgotten the matches. Do you have any in your bag?\nA: Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?\nB: Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.\nA: No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.\nB: Speaking of relaxing, did we decide on any specific activities to do while we're there?\nA: I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.\nB: Definitely. So, what time are we leaving tomorrow morning?\nA: Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.\nB: Sounds perfect. See you then!",
5
+ "total_duration": 70.17238095238095,
6
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/stereo_dialogue.wav",
7
+ "speaker_tracks": {
8
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/A_track.wav",
9
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/B_track.wav"
10
+ },
11
+ "error_type": "error_after_interrupt",
12
+ "segments": [
13
+ {
14
+ "speaker": "A",
15
+ "text": "Hey Val, are you all packed for our camping trip?",
16
+ "original_text": "Hey Val, are you all packed for our camping trip?",
17
+ "start_time": 0,
18
+ "end_time": 3.355283446712018,
19
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_0_A.wav",
20
+ "silence_duration": 0,
21
+ "is_interrupted": false
22
+ },
23
+ {
24
+ "speaker": "B",
25
+ "text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
26
+ "original_text": "Yeah, I think so. I have the tents, food, and clothes. Did you remember to pack your bag?",
27
+ "start_time": 3.7205840462203787,
28
+ "end_time": 8.2368652253587,
29
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_1_B.wav",
30
+ "silence_duration": 0.3653005995083607,
31
+ "is_interrupted": false
32
+ },
33
+ {
34
+ "speaker": "A",
35
+ "text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of",
36
+ "original_text": "Yes, I did. I have everything we need. Are you sure you're ready for this? It's going to be a lot of [interrupt] fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience.",
37
+ "start_time": 8.610541776107988,
38
+ "end_time": 23.808002093568305,
39
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_2_A.wav",
40
+ "silence_duration": 0.37367655074928696,
41
+ "is_interrupted": true,
42
+ "text_after_interrupt": "fun, but also quite challenging with all the hiking and outdoor activities we have planned, especially since neither of us has much wilderness experience."
43
+ },
44
+ {
45
+ "speaker": "B",
46
+ "text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
47
+ "original_text": "Actually, I just realized I might have forgotten the matches. Do you have any in your bag?",
48
+ "start_time": 15.588138147990074,
49
+ "end_time": 19.686460143454926,
50
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_3_B.wav",
51
+ "silence_duration": 0.407293268908306,
52
+ "is_interrupted": false
53
+ },
54
+ {
55
+ "speaker": "A",
56
+ "text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
57
+ "original_text": "Oh, I didn't check for matches. I'll make sure to pack them. So, you were saying you were looking forward to it?",
58
+ "start_time": 24.198980395182797,
59
+ "end_time": 30.189728694502527,
60
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_4_A.wav",
61
+ "silence_duration": 0.39097830161449093,
62
+ "is_interrupted": false
63
+ },
64
+ {
65
+ "speaker": "B",
66
+ "text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
67
+ "original_text": "Yeah, I'm ready. I've been looking forward to it for a while. Thanks for planning it all out.",
68
+ "start_time": 30.59422649262823,
69
+ "end_time": 35.60973669670986,
70
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_5_B.wav",
71
+ "silence_duration": 0.4044977981257025,
72
+ "is_interrupted": false
73
+ },
74
+ {
75
+ "speaker": "A",
76
+ "text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
77
+ "original_text": "No problem. I know we both needed a break from work and life in general. It'll be nice to relax in nature for a few days and just unwind.",
78
+ "start_time": 36.08564285355298,
79
+ "end_time": 45.52455441817884,
80
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_6_A.wav",
81
+ "silence_duration": 0.47590615684312176,
82
+ "is_interrupted": false
83
+ },
84
+ {
85
+ "speaker": "B",
86
+ "text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
87
+ "original_text": "Speaking of relaxing, did we decide on any specific activities to do while we're there?",
88
+ "start_time": 45.86997259942616,
89
+ "end_time": 50.33981386926743,
90
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_7_B.wav",
91
+ "silence_duration": 0.34541818124732176,
92
+ "is_interrupted": false
93
+ },
94
+ {
95
+ "speaker": "A",
96
+ "text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
97
+ "original_text": "I was thinking we could go hiking and maybe do some fishing. But I haven't mentioned the exact schedule yet.",
98
+ "start_time": 50.65173673057719,
99
+ "end_time": 56.66570498454544,
100
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_8_A.wav",
101
+ "silence_duration": 0.3119228613097595,
102
+ "is_interrupted": false
103
+ },
104
+ {
105
+ "speaker": "B",
106
+ "text": "Definitely. So, what time are we leaving tomorrow morning?",
107
+ "original_text": "Definitely. So, what time are we leaving tomorrow morning?",
108
+ "start_time": 57.18372257432499,
109
+ "end_time": 60.84086543146785,
110
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_9_B.wav",
111
+ "silence_duration": 0.5180175897795466,
112
+ "is_interrupted": false
113
+ },
114
+ {
115
+ "speaker": "A",
116
+ "text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
117
+ "original_text": "Around 9am should be good. That will give us enough time to get there and set up camp before it gets dark.",
118
+ "start_time": 61.21524274027814,
119
+ "end_time": 67.84453979243234,
120
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_10_A.wav",
121
+ "silence_duration": 0.37437730881029474,
122
+ "is_interrupted": false
123
+ },
124
+ {
125
+ "speaker": "B",
126
+ "text": "Sounds perfect. See you then!",
127
+ "original_text": "Sounds perfect. See you then!",
128
+ "start_time": 68.44252417070729,
129
+ "end_time": 70.17241079202248,
130
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--790538/temp/line_11_B.wav",
131
+ "silence_duration": 0.5979843782749388,
132
+ "is_interrupted": false
133
+ }
134
+ ]
135
+ },
136
+ "SODA_PROCESSED--train--123906": {
137
+ "original_text": "A: You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
138
+ "cleaned_text": "A:You know, it's really annoying when things are loose and they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.\n\nB: Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?\n\nA: Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.\n\nB: Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.\n\nA: Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.\n\nB: I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?\n\nA: It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?\n\nB: Yeah, it's really frustrating. Especially when it's something you need to use regularly.\n\nA: I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.\n\nB: Yeah, you probably should. Thanks for fixing that, by the way.\n\nA: No problem. Just doing my part to keep things in working order around here.",
139
+ "total_duration": 81.61818594104308,
140
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/stereo_dialogue.wav",
141
+ "speaker_tracks": {
142
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/A_track.wav",
143
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/B_track.wav"
144
+ },
145
+ "error_type": "error_after_interrupt",
146
+ "segments": [
147
+ {
148
+ "speaker": "A",
149
+ "text": "You know, it's really annoying when things are loose and",
150
+ "original_text": "You know, it's really annoying when things are loose and [interrupt] they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship.",
151
+ "start_time": 0,
152
+ "end_time": 13.293424036281179,
153
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_0_A.wav",
154
+ "silence_duration": 0,
155
+ "is_interrupted": true,
156
+ "text_after_interrupt": "they just don't stay in place like they're supposed to, especially when you're in the middle of an important task and everything keeps falling apart because of poor craftsmanship."
157
+ },
158
+ {
159
+ "speaker": "B",
160
+ "text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
161
+ "original_text": "Oh, absolutely! Like when you try to hang something, and it just won't stay in place. Why can't they just be tight and sturdy?",
162
+ "start_time": 3.250793650793651,
163
+ "end_time": 9.961360544217687,
164
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_1_B.wav",
165
+ "silence_duration": 0.5919206270367532,
166
+ "is_interrupted": false
167
+ },
168
+ {
169
+ "speaker": "A",
170
+ "text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
171
+ "original_text": "Exactly! It drives me crazy when I see a loose screw or a crooked picture frame.",
172
+ "start_time": 13.747669310225408,
173
+ "end_time": 19.123088811359196,
174
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_2_A.wav",
175
+ "silence_duration": 0.4542452739442295,
176
+ "is_interrupted": false
177
+ },
178
+ {
179
+ "speaker": "B",
180
+ "text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
181
+ "original_text": "Yeah, it's like they're just begging to be fixed. But you know, sometimes I wonder if people even notice these things or if it's just us.",
182
+ "start_time": 19.55223911029212,
183
+ "end_time": 26.39051575428305,
184
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_3_B.wav",
185
+ "silence_duration": 0.4291502989329248,
186
+ "is_interrupted": false
187
+ },
188
+ {
189
+ "speaker": "A",
190
+ "text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
191
+ "original_text": "Right? And it's not like it's that hard to fix them, but people just don't bother. Or they don't have the time, or they don't know how. But it just drives me up the wall when things are out of place or not functioning properly.",
192
+ "start_time": 26.849330263466445,
193
+ "end_time": 39.388105773670524,
194
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_4_A.wav",
195
+ "silence_duration": 0.45881450918339584,
196
+ "is_interrupted": false
197
+ },
198
+ {
199
+ "speaker": "B",
200
+ "text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
201
+ "original_text": "I totally get you. There's nothing worse than trying to use something and it's just not working right because someone didn't take the time to fix it properly. Speaking of which, do you think it's just laziness, or maybe they don't have the right tools?",
202
+ "start_time": 39.82405615117125,
203
+ "end_time": 51.73589288586513,
204
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_5_B.wav",
205
+ "silence_duration": 0.4359503775007275,
206
+ "is_interrupted": false
207
+ },
208
+ {
209
+ "speaker": "A",
210
+ "text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
211
+ "original_text": "It could be either, honestly. But what's the point of having things if they're just going to be broken and dysfunctional?",
212
+ "start_time": 52.27556460896347,
213
+ "end_time": 58.811981842523565,
214
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_6_A.wav",
215
+ "silence_duration": 0.5396717230983441,
216
+ "is_interrupted": false
217
+ },
218
+ {
219
+ "speaker": "B",
220
+ "text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
221
+ "original_text": "Yeah, it's really frustrating. Especially when it's something you need to use regularly.",
222
+ "start_time": 59.3890926661041,
223
+ "end_time": 63.90537384524242,
224
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_7_B.wav",
225
+ "silence_duration": 0.5771108235805331,
226
+ "is_interrupted": false
227
+ },
228
+ {
229
+ "speaker": "A",
230
+ "text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
231
+ "original_text": "I know, I feel like I could go on about this forever. But I should probably get back to tightening this screw before it drives me even crazier.",
232
+ "start_time": 64.32218282146329,
233
+ "end_time": 72.3911170618261,
234
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_8_A.wav",
235
+ "silence_duration": 0.4168089762208619,
236
+ "is_interrupted": false
237
+ },
238
+ {
239
+ "speaker": "B",
240
+ "text": "Yeah, you probably should. Thanks for fixing that, by the way.",
241
+ "original_text": "Yeah, you probably should. Thanks for fixing that, by the way.",
242
+ "start_time": 72.7979076035892,
243
+ "end_time": 76.45505046073205,
244
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_9_B.wav",
245
+ "silence_duration": 0.4067905417630988,
246
+ "is_interrupted": false
247
+ },
248
+ {
249
+ "speaker": "A",
250
+ "text": "No problem. Just doing my part to keep things in working order around here.",
251
+ "original_text": "No problem. Just doing my part to keep things in working order around here.",
252
+ "start_time": 76.90457674986,
253
+ "end_time": 81.6182275435108,
254
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--123906/temp/line_10_A.wav",
255
+ "silence_duration": 0.4495262891279488,
256
+ "is_interrupted": false
257
+ }
258
+ ]
259
+ },
260
+ "SODA_PROCESSED--train--1112763": {
261
+ "original_dialog_id": "",
262
+ "dialog_index": 1112763,
263
+ "processed_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA: Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable. \nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
264
+ "clean_dialogue": "A: Hey Sarah, what's up? \nB: Not much, what are you up to? \nA:Just standing in this pool of water, trying to escape the heat. It's really hot out and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.\nB: Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure. \nA: Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather. \nB: That sounds nice. I wish I could be in a pool right now too. \nA: Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me. \nB: I bet. It must be really nice to just relax in the water and not have to worry about anything else. \nA: Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon. \nB: Me too, Cleveland. Me too.",
265
+ "speaker_tracks": {
266
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/A_track.wav",
267
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/B_track.wav"
268
+ },
269
+ "error_type": "error_after_interrupt",
270
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/stereo_dialogue.wav",
271
+ "total_duration": 80.37741496598639,
272
+ "segments": [
273
+ {
274
+ "speaker": "A",
275
+ "text": "Hey Sarah, what's up?",
276
+ "original_text": "Hey Sarah, what's up?",
277
+ "start_time": 0,
278
+ "end_time": 1.6486167800453515,
279
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_0_A.wav",
280
+ "silence_duration": 0,
281
+ "is_interrupted": false
282
+ },
283
+ {
284
+ "speaker": "B",
285
+ "text": "Not much, what are you up to?",
286
+ "original_text": "Not much, what are you up to?",
287
+ "start_time": 2.244027328872719,
288
+ "end_time": 3.497904879893127,
289
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_1_B.wav",
290
+ "silence_duration": 0.5954105488273673,
291
+ "is_interrupted": false
292
+ },
293
+ {
294
+ "speaker": "A",
295
+ "text": "Just standing in this pool of water, trying to escape the heat. It's really hot out",
296
+ "original_text": "Just standing in this pool of water, trying to escape the heat. It's really hot out [interrupt] and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable.",
297
+ "start_time": 3.9748179407694604,
298
+ "end_time": 18.847198893150413,
299
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_2_A.wav",
300
+ "silence_duration": 0.47691306087633345,
301
+ "is_interrupted": true,
302
+ "text_after_interrupt": "and I was thinking about just staying in here for the rest of the day because the water is perfectly refreshing and the shade from these trees makes it even more comfortable."
303
+ },
304
+ {
305
+ "speaker": "B",
306
+ "text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
307
+ "original_text": "Sorry to cut in, but how hot is it exactly? I was thinking of going for a run later, but now I'm not so sure.",
308
+ "start_time": 9.09481794076946,
309
+ "end_time": 15.352595718547239,
310
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_3_B.wav",
311
+ "silence_duration": 0.5962546040921268,
312
+ "is_interrupted": false
313
+ },
314
+ {
315
+ "speaker": "A",
316
+ "text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
317
+ "original_text": "Yeah, it's probably not the best idea to run today. It's around 95 degrees, and it feels even hotter with the humidity. The pool feels really good and it's helping me cool down especially since there's a slight breeze too that makes the whole experience feel like a mini vacation from this brutal summer weather.",
318
+ "start_time": 19.411937689342565,
319
+ "end_time": 39.3462687551022,
320
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_4_A.wav",
321
+ "silence_duration": 0.5647387961921536,
322
+ "is_interrupted": false
323
+ },
324
+ {
325
+ "speaker": "B",
326
+ "text": "That sounds nice. I wish I could be in a pool right now too.",
327
+ "original_text": "That sounds nice. I wish I could be in a pool right now too.",
328
+ "start_time": 39.7352726618639,
329
+ "end_time": 42.986066312657556,
330
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_5_B.wav",
331
+ "silence_duration": 0.3890039067616998,
332
+ "is_interrupted": false
333
+ },
334
+ {
335
+ "speaker": "A",
336
+ "text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
337
+ "original_text": "Yeah, it's pretty great. I feel like I could stay here all day without getting bored because there's so much to enjoy - watching the clouds drift by, listening to the birds chirping, and feeling the gentle movement of the water around me.",
338
+ "start_time": 43.57081570786076,
339
+ "end_time": 58.803105957293866,
340
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_6_A.wav",
341
+ "silence_duration": 0.5847493952032004,
342
+ "is_interrupted": false
343
+ },
344
+ {
345
+ "speaker": "B",
346
+ "text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
347
+ "original_text": "I bet. It must be really nice to just relax in the water and not have to worry about anything else.",
348
+ "start_time": 59.111149637610815,
349
+ "end_time": 63.94090020450424,
350
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_7_B.wav",
351
+ "silence_duration": 0.3080436803169506,
352
+ "is_interrupted": false
353
+ },
354
+ {
355
+ "speaker": "A",
356
+ "text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
357
+ "original_text": "Yeah, it is. It's really peaceful and calming. I'm glad I decided to come out here and do this instead of staying indoors where I would just be sweating and miserable, staring at the same four walls all afternoon.",
358
+ "start_time": 64.4324265657298,
359
+ "end_time": 78.10897985371166,
360
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_8_A.wav",
361
+ "silence_duration": 0.4915263612255627,
362
+ "is_interrupted": false
363
+ },
364
+ {
365
+ "speaker": "B",
366
+ "text": "Me too, Cleveland. Me too.",
367
+ "original_text": "Me too, Cleveland. Me too.",
368
+ "start_time": 78.5430769468726,
369
+ "end_time": 80.37745336410616,
370
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--1112763/temp/line_9_B.wav",
371
+ "silence_duration": 0.4340970931609471,
372
+ "is_interrupted": false
373
+ }
374
+ ]
375
+ },
376
+ "SODA_PROCESSED--train--822773": {
377
+ "original_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA: I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
378
+ "cleaned_text": "A: Hey, Sarah! I'm so glad to see you!\nB: Hey, Renesmee! I'm happy to see you too! How are you doing today?\nA:I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.\nB: Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?\nA: That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!\nB: Absolutely, let's start with fetch and then we can see what else we feel like doing!\nA: Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
379
+ "total_duration": 45.90185941043084,
380
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/stereo_dialogue.wav",
381
+ "speaker_tracks": {
382
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/A_track.wav",
383
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/B_track.wav"
384
+ },
385
+ "error_type": "error_after_interrupt",
386
+ "segments": [
387
+ {
388
+ "speaker": "A",
389
+ "text": "Hey, Sarah! I'm so glad to see you!",
390
+ "original_text": "Hey, Sarah! I'm so glad to see you!",
391
+ "start_time": 0,
392
+ "end_time": 2.7747845804988662,
393
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_0_A.wav",
394
+ "silence_duration": 0,
395
+ "is_interrupted": false
396
+ },
397
+ {
398
+ "speaker": "B",
399
+ "text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
400
+ "original_text": "Hey, Renesmee! I'm happy to see you too! How are you doing today?",
401
+ "start_time": 3.1698846088737547,
402
+ "end_time": 7.268206604338607,
403
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_1_B.wav",
404
+ "silence_duration": 0.39510002837488867,
405
+ "is_interrupted": false
406
+ },
407
+ {
408
+ "speaker": "A",
409
+ "text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with",
410
+ "original_text": "I'm doing great today! I love exploring and playing games. Do you want to play a game of fetch with [interrupt] me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities.",
411
+ "start_time": 7.859956363848243,
412
+ "end_time": 22.209888336637356,
413
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_2_A.wav",
414
+ "silence_duration": 0.5917497595096354,
415
+ "is_interrupted": true,
416
+ "text_after_interrupt": "me in the park later? We could bring some tennis balls and maybe even invite some other friends to join us for an afternoon of fun activities."
417
+ },
418
+ {
419
+ "speaker": "B",
420
+ "text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
421
+ "original_text": "Oh, fetch sounds fun! But I was wondering, do you have any other favorite games we could try later?",
422
+ "start_time": 14.558913279948015,
423
+ "end_time": 19.574423484029648,
424
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_3_B.wav",
425
+ "silence_duration": 0.35472772157833465,
426
+ "is_interrupted": false
427
+ },
428
+ {
429
+ "speaker": "A",
430
+ "text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
431
+ "original_text": "That sounds like fun! I would love to play a game of fetch with you. Maybe after that, we can try some other games too. Alright, let's go!",
432
+ "start_time": 22.580509090185526,
433
+ "end_time": 31.798831085650377,
434
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_4_A.wav",
435
+ "silence_duration": 0.3706207535481718,
436
+ "is_interrupted": false
437
+ },
438
+ {
439
+ "speaker": "B",
440
+ "text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
441
+ "original_text": "Absolutely, let's start with fetch and then we can see what else we feel like doing!",
442
+ "start_time": 32.37886103861385,
443
+ "end_time": 36.53523292070002,
444
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_5_B.wav",
445
+ "silence_duration": 0.5800299529634694,
446
+ "is_interrupted": false
447
+ },
448
+ {
449
+ "speaker": "A",
450
+ "text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
451
+ "original_text": "Renesmee and Sarah seem to be having a great time playing fetch together. Sarah is laughing and Renesmee looks like he's having a blast running around.",
452
+ "start_time": 36.96218842583218,
453
+ "end_time": 45.90187096551472,
454
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--822773/temp/line_6_A.wav",
455
+ "silence_duration": 0.4269555051321636,
456
+ "is_interrupted": false
457
+ }
458
+ ]
459
+ },
460
+ "SODA_PROCESSED--train--424960": {
461
+ "original_dialog_id": "",
462
+ "dialog_index": 424960,
463
+ "processed_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB: Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it. \nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
464
+ "clean_dialogue": "A: So, you've traveled a lot? \nB: Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges. \nA: Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas? \nB:Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.\nA: Speaking of cultures, which one left the biggest impression on you? \nB: That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience. \nA: It must be wonderful to be able to see the world like that. \nB: It is. I highly recommend it if you ever get the chance. \nA: I'm definitely going to try to make it happen. Thank you for talking with me about it.",
465
+ "speaker_tracks": {
466
+ "A": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/A_track.wav",
467
+ "B": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/B_track.wav"
468
+ },
469
+ "error_type": "error_after_interrupt",
470
+ "stereo_audio": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/stereo_dialogue.wav",
471
+ "total_duration": 61.32780045351474,
472
+ "segments": [
473
+ {
474
+ "speaker": "A",
475
+ "text": "So, you've traveled a lot?",
476
+ "original_text": "So, you've traveled a lot?",
477
+ "start_time": 0,
478
+ "end_time": 1.474467120181406,
479
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_0_A.wav",
480
+ "silence_duration": 0,
481
+ "is_interrupted": false
482
+ },
483
+ {
484
+ "speaker": "B",
485
+ "text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
486
+ "original_text": "Yes, I have. I've been to all sorts of interesting places, from tropical islands to snowy mountain ranges.",
487
+ "start_time": 2.068172914767877,
488
+ "end_time": 7.826721667602344,
489
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_1_B.wav",
490
+ "silence_duration": 0.5937057945864714,
491
+ "is_interrupted": false
492
+ },
493
+ {
494
+ "speaker": "A",
495
+ "text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
496
+ "original_text": "Sorry to interrupt, but when you say \"all sorts of places,\" do you mean different countries or also different kinds of environments, like cities versus rural areas?",
497
+ "start_time": 8.340391189370486,
498
+ "end_time": 18.67327100796459,
499
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_2_A.wav",
500
+ "silence_duration": 0.5136695217681417,
501
+ "is_interrupted": false
502
+ },
503
+ {
504
+ "speaker": "B",
505
+ "text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new",
506
+ "original_text": "Both, actually. I've visited various countries and experienced everything from bustling cities to remote villages. It's definitely a great experience. You get to see so many different cultures and meet new [interrupt] people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it.",
507
+ "start_time": 19.06107440491716,
508
+ "end_time": 36.000031321016934,
509
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_3_B.wav",
510
+ "silence_duration": 0.38780339695257027,
511
+ "is_interrupted": true,
512
+ "text_after_interrupt": "people, each with their own unique stories and perspectives that can completely change how you view the world and your place in it."
513
+ },
514
+ {
515
+ "speaker": "A",
516
+ "text": "Speaking of cultures, which one left the biggest impression on you?",
517
+ "original_text": "Speaking of cultures, which one left the biggest impression on you?",
518
+ "start_time": 29.660983701969315,
519
+ "end_time": 33.329736536436435,
520
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_4_A.wav",
521
+ "silence_duration": 0.5959806021294678,
522
+ "is_interrupted": false
523
+ },
524
+ {
525
+ "speaker": "B",
526
+ "text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
527
+ "original_text": "That's a tough question, but I think visiting Japan had the most profound impact on me. The blend of tradition and modernity there is just fascinating. But I haven't even mentioned how meeting new people really enhances the whole travel experience.",
528
+ "start_time": 36.35174674763023,
529
+ "end_time": 48.86730230318579,
530
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_5_B.wav",
531
+ "silence_duration": 0.3517154266132969,
532
+ "is_interrupted": false
533
+ },
534
+ {
535
+ "speaker": "A",
536
+ "text": "It must be wonderful to be able to see the world like that.",
537
+ "original_text": "It must be wonderful to be able to see the world like that.",
538
+ "start_time": 49.35988001043792,
539
+ "end_time": 52.57584372925878,
540
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_6_A.wav",
541
+ "silence_duration": 0.49257770725213246,
542
+ "is_interrupted": false
543
+ },
544
+ {
545
+ "speaker": "B",
546
+ "text": "It is. I highly recommend it if you ever get the chance.",
547
+ "original_text": "It is. I highly recommend it if you ever get the chance.",
548
+ "start_time": 53.04658029347679,
549
+ "end_time": 56.18127417102781,
550
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_7_B.wav",
551
+ "silence_duration": 0.4707365642180112,
552
+ "is_interrupted": false
553
+ },
554
+ {
555
+ "speaker": "A",
556
+ "text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
557
+ "original_text": "I'm definitely going to try to make it happen. Thank you for talking with me about it.",
558
+ "start_time": 56.57932539417633,
559
+ "end_time": 61.32780611979991,
560
+ "audio_file": "/root/autodl-tmp/output_matches_soda/SODA_PROCESSED--train--424960/temp/line_8_A.wav",
561
+ "silence_duration": 0.39805122314851726,
562
+ "is_interrupted": false
563
+ }
564
+ ]
565
+ }
566
+ }
ms-swift/swift/llm/dataset/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import inspect
3
+
4
+ import datasets.fingerprint
5
+ from datasets import Dataset as HfDataset
6
+
7
+ from ..utils import get_temporary_cache_files_directory
8
+ from . import dataset
9
+ from .loader import DATASET_TYPE, load_dataset
10
+ from .media import MediaResource
11
+ from .preprocessor import (AlpacaPreprocessor, AutoPreprocessor, MessagesPreprocessor, ResponsePreprocessor,
12
+ RowPreprocessor)
13
+ from .register import DATASET_MAPPING, DatasetMeta, SubsetDataset, register_dataset, register_dataset_info
14
+ from .utils import (EncodePreprocessor, GetLengthPreprocessor, IterablePackingDataset, LazyLLMDataset, PackingDataset,
15
+ sample_dataset)
16
+
17
+ update_fingerprint_origin = datasets.fingerprint.update_fingerprint
18
+
19
+
20
+ def update_fingerprint(fingerprint, transform, transform_args):
21
+ if 'function' in transform_args:
22
+ # Calculate the hash using the source code.
23
+ if hasattr(transform_args['function'], '__self__'):
24
+ function = inspect.getsource(transform_args['function'].__self__.__class__)
25
+ else:
26
+ function = inspect.getsource(transform_args['function'])
27
+ transform_args['function'] = (transform_args['function'], function)
28
+ return update_fingerprint_origin(fingerprint, transform, transform_args)
29
+
30
+
31
+ datasets.fingerprint.update_fingerprint = update_fingerprint
32
+ datasets.arrow_dataset.update_fingerprint = update_fingerprint
33
+ datasets.fingerprint.get_temporary_cache_files_directory = get_temporary_cache_files_directory
34
+ datasets.arrow_dataset.get_temporary_cache_files_directory = get_temporary_cache_files_directory
35
+ register_dataset_info()
ms-swift/swift/llm/dataset/__pycache__/loader.cpython-310.pyc ADDED
Binary file (15.9 kB). View file
 
ms-swift/swift/llm/dataset/__pycache__/media.cpython-310.pyc ADDED
Binary file (5 kB). View file
 
ms-swift/swift/llm/dataset/dataset/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from . import llm, mllm
ms-swift/swift/llm/dataset/dataset/__pycache__/llm.cpython-310.pyc ADDED
Binary file (23.9 kB). View file
 
ms-swift/swift/llm/dataset/preprocessor/extra.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import numpy as np
5
+
6
+ from .core import ResponsePreprocessor
7
+
8
+
9
+ class GroundingMixin:
10
+ """This class offers prompts to the grounding task"""
11
+ task_type: Optional[str] = None
12
+
13
+ _grounding_language_mixin = [0.8, 0.2]
14
+ _grounding_prompts = {
15
+ 'grounding': {
16
+ 'en': [('<ref-object>', '<bbox>'), ('The positions of <ref-object> is', '<bbox>'),
17
+ ('Find the positions of <ref-object>', '<bbox>'), ('Where is <ref-object>', '<bbox>'),
18
+ ('Find <ref-object>', '<bbox>'), ('Show me <ref-object>', '<bbox>'),
19
+ ('Detect <ref-object>', '<bbox>'), ('Locate <ref-object>', '<bbox>'),
20
+ ('Tell me the location of <ref-object>', '<bbox>'), ('Give the location of <ref-object>', '<bbox>'),
21
+ ('Provide the bounding box coordinate of <ref-object>', '<bbox>')],
22
+ 'zh': [('<ref-object>', '<bbox>'), ('<ref-object>的位置在图片中', '<bbox>'), ('<ref-object>在图片中', '<bbox>'),
23
+ ('<ref-object>在', '<bbox>'), ('找到<ref-object>的位置', '<bbox>'), ('<ref-object>在哪里', '<bbox>'),
24
+ ('提供<ref-object>的坐标位置', '<bbox>')]
25
+ },
26
+ 'caption': {
27
+ 'en': [
28
+ ('<bbox>', '<ref-object>'),
29
+ ('The object at position <bbox>', '<ref-object>'),
30
+ ('This <bbox> is', '<ref-object>'),
31
+ ('What is the object at <bbox>', '<ref-object>'),
32
+ ('Describe <bbox>', '<ref-object>'),
33
+ ('<bbox> is', '<ref-object>'),
34
+ ('The bounding box coordinate <bbox> contains', '<ref-object>'),
35
+ ],
36
+ 'zh': [
37
+ ('<bbox>', '<ref-object>'),
38
+ ('<bbox>是什么', '<ref-object>'),
39
+ ('<bbox>的位置包含', '<ref-object>'),
40
+ ('描述<bbox>', '<ref-object>'),
41
+ ('<bbox>中是', '<ref-object>'),
42
+ ('坐标<bbox>描述了什么', '<ref-object>'),
43
+ ('描述<bbox>中的事物', '<ref-object>'),
44
+ ]
45
+ },
46
+ }
47
+
48
+ def construct_grounding_prompt(self):
49
+ # TODO Only support one bbox to one object
50
+ lang = np.random.choice(['en', 'zh'], p=[0.8, 0.2])
51
+ prompts = GroundingMixin._grounding_prompts[self.task_type][lang]
52
+ query, response = prompts[np.random.choice(range(len(prompts)))]
53
+ return query, response
54
+
55
+
56
+ class TextGenerationPreprocessor(ResponsePreprocessor):
57
+
58
+ def __init__(self,
59
+ *,
60
+ prompt: str,
61
+ query_tag: str = '{{QUERY}}',
62
+ columns: Optional[Dict[str, str]] = None,
63
+ **kwargs) -> None:
64
+ self.query_tag = query_tag
65
+ self.prompt = prompt
66
+ super().__init__(columns=columns, **kwargs)
67
+
68
+ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
69
+ row['query'] = self.prompt.replace(self.query_tag, row['query'])
70
+ return super().preprocess(row)
71
+
72
+
73
+ class ClsGenerationPreprocessor(ResponsePreprocessor):
74
+
75
+ def __init__(self,
76
+ labels: List[str],
77
+ *,
78
+ task: str,
79
+ is_pair_seq: bool = False,
80
+ columns: Optional[Dict[str, str]] = None,
81
+ **kwargs) -> None:
82
+ self.labels = labels
83
+ self.task = task
84
+ self.is_pair_seq = is_pair_seq
85
+
86
+ category = ', '.join(labels)
87
+ self.sentence2_key = 'sentence2'
88
+ self.label_key = 'label'
89
+ if is_pair_seq:
90
+ self.sentence_key = 'sentence1'
91
+ inputs = 'Sentence1: {sentence1}\nSentence2: {sentence2}'
92
+ else:
93
+ self.sentence_key = 'sentence'
94
+ inputs = 'Sentence: {sentence}'
95
+ self.prompt = f"""Task: {task}
96
+ {inputs}
97
+ Category: {category}
98
+ Output:"""
99
+ super().__init__(columns=columns, **kwargs)
100
+
101
+ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
102
+ label = row.pop(self.label_key, None)
103
+ if label is None:
104
+ return
105
+
106
+ if self.is_pair_seq:
107
+ query = self.prompt.format(sentence1=row.pop(self.sentence_key), sentence2=row.pop(self.sentence2_key))
108
+ else:
109
+ query = self.prompt.format(sentence=row.pop(self.sentence_key))
110
+ row['query'] = query
111
+ row['response'] = self.labels[int(label)]
112
+ return super().preprocess(row)
ms-swift/swift/llm/ds_config/zero2.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "bf16": {
12
+ "enabled": "auto"
13
+ },
14
+
15
+ "zero_optimization": {
16
+ "stage": 2,
17
+ "offload_optimizer": {
18
+ "device": "none",
19
+ "pin_memory": true
20
+ },
21
+ "allgather_partitions": true,
22
+ "allgather_bucket_size": 2e8,
23
+ "overlap_comm": false,
24
+ "reduce_scatter": true,
25
+ "reduce_bucket_size": 2e8,
26
+ "contiguous_gradients": true
27
+ },
28
+
29
+ "gradient_accumulation_steps": "auto",
30
+ "gradient_clipping": "auto",
31
+ "steps_per_print": 2000,
32
+ "train_batch_size": "auto",
33
+ "train_micro_batch_size_per_gpu": "auto",
34
+ "wall_clock_breakdown": false
35
+ }
ms-swift/swift/llm/ds_config/zero3.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "bf16": {
12
+ "enabled": "auto"
13
+ },
14
+
15
+ "zero_optimization": {
16
+ "stage": 3,
17
+ "offload_optimizer": {
18
+ "device": "none",
19
+ "pin_memory": true
20
+ },
21
+ "offload_param": {
22
+ "device": "none",
23
+ "pin_memory": true
24
+ },
25
+ "overlap_comm": false,
26
+ "contiguous_gradients": true,
27
+ "sub_group_size": 1e9,
28
+ "reduce_bucket_size": "auto",
29
+ "zero_quantized_weights": false,
30
+ "zero_quantized_gradients": false,
31
+ "stage3_prefetch_bucket_size": "auto",
32
+ "stage3_param_persistence_threshold": "auto",
33
+ "stage3_max_live_parameters": 1e9,
34
+ "stage3_max_reuse_distance": 1e9,
35
+ "stage3_gather_16bit_weights_on_model_save": true
36
+ },
37
+
38
+ "gradient_accumulation_steps": "auto",
39
+ "gradient_clipping": "auto",
40
+ "steps_per_print": 2000,
41
+ "train_batch_size": "auto",
42
+ "train_micro_batch_size_per_gpu": "auto",
43
+ "wall_clock_breakdown": false
44
+ }
ms-swift/swift/llm/ds_config/zero3_offload.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "bf16": {
12
+ "enabled": "auto"
13
+ },
14
+
15
+ "zero_optimization": {
16
+ "stage": 3,
17
+ "offload_optimizer": {
18
+ "device": "cpu",
19
+ "pin_memory": true
20
+ },
21
+ "offload_param": {
22
+ "device": "cpu",
23
+ "pin_memory": true
24
+ },
25
+ "overlap_comm": false,
26
+ "contiguous_gradients": true,
27
+ "sub_group_size": 1e9,
28
+ "reduce_bucket_size": "auto",
29
+ "stage3_prefetch_bucket_size": "auto",
30
+ "stage3_param_persistence_threshold": "auto",
31
+ "stage3_max_live_parameters": 1e9,
32
+ "stage3_max_reuse_distance": 1e9,
33
+ "stage3_gather_16bit_weights_on_model_save": true
34
+ },
35
+
36
+ "gradient_accumulation_steps": "auto",
37
+ "gradient_clipping": "auto",
38
+ "steps_per_print": 2000,
39
+ "train_batch_size": "auto",
40
+ "train_micro_batch_size_per_gpu": "auto",
41
+ "wall_clock_breakdown": false
42
+ }
ms-swift/swift/llm/eval/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from .eval import SwiftEval, eval_main
ms-swift/swift/llm/eval/utils.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import asdict
2
+ from typing import Any, Dict, List, Union
3
+
4
+ import torch.nn as nn
5
+ from evalscope.models.custom import CustomModel
6
+ from transformers import PreTrainedModel
7
+
8
+ from ..infer import PtEngine, RequestConfig
9
+ from ..template import InferRequest
10
+
11
+
12
+ class EvalModel(CustomModel):
13
+
14
+ def __init__(self, model: Union[PreTrainedModel, nn.Module], template, max_batch_size, model_name: str,
15
+ **kwargs) -> None:
16
+ super().__init__(config={'model_id': model_name}, **kwargs)
17
+ self.model_name = model_name
18
+ self.model = model
19
+ self.template = template
20
+ self.engine = PtEngine.from_model_template(model, template, max_batch_size=max_batch_size)
21
+
22
+ def predict(self, prompts: List[dict], **kwargs) -> List[Dict[str, Any]]:
23
+ # use origin inputs
24
+ infer_requests = self.prepare_inputs(kwargs.get('origin_inputs', prompts))
25
+
26
+ infer_cfg = kwargs['infer_cfg'].copy()
27
+ generation_config = RequestConfig(**infer_cfg)
28
+
29
+ response = self.engine.infer(infer_requests=infer_requests, request_config=generation_config, use_tqdm=False)
30
+ dict_response = [asdict(item) for item in response]
31
+ return dict_response
32
+
33
+ def prepare_inputs(self, prompts: Union[List[dict], List[str]]) -> List[InferRequest]:
34
+ infer_requests = []
35
+ for input_item in prompts:
36
+ if isinstance(input_item, str):
37
+ query = input_item
38
+ system_prompt = None
39
+ else:
40
+ data: list = input_item['data']
41
+ if isinstance(data[0], tuple): # for truthful_qa and hellaswag
42
+ query = '\n'.join(''.join(item) for item in data)
43
+ system_prompt = input_item.get('system_prompt', None)
44
+ else:
45
+ query = data[0]
46
+ system_prompt = input_item.get('system_prompt', None)
47
+ # prepare messages
48
+ messages = []
49
+ if system_prompt:
50
+ messages.append({'role': 'system', 'content': system_prompt})
51
+ messages.append({'role': 'user', 'content': query})
52
+ infer_requests.append(InferRequest(messages=messages))
53
+ return infer_requests
ms-swift/swift/llm/export/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from .export import SwiftExport, export_main
3
+ from .merge_lora import merge_lora
4
+ from .ollama import export_to_ollama
5
+ from .quant import quantize_model
ms-swift/swift/llm/infer/__init__.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import TYPE_CHECKING
3
+
4
+ from swift.utils.import_utils import _LazyModule
5
+
6
+ if TYPE_CHECKING:
7
+ from .infer import infer_main, SwiftInfer
8
+ from .rollout import rollout_main
9
+ from .deploy import deploy_main, SwiftDeploy, run_deploy
10
+ from .protocol import RequestConfig, Function
11
+ from .utils import prepare_model_template
12
+ from .infer_engine import (InferEngine, VllmEngine, LmdeployEngine, PtEngine, InferClient,
13
+ prepare_generation_config, AdapterRequest, BaseInferEngine)
14
+ else:
15
+ _import_structure = {
16
+ 'rollout': ['rollout_main'],
17
+ 'infer': ['infer_main', 'SwiftInfer'],
18
+ 'deploy': ['deploy_main', 'SwiftDeploy', 'run_deploy'],
19
+ 'protocol': ['RequestConfig', 'Function'],
20
+ 'utils': ['prepare_model_template'],
21
+ 'infer_engine': [
22
+ 'InferEngine', 'VllmEngine', 'LmdeployEngine', 'PtEngine', 'InferClient', 'prepare_generation_config',
23
+ 'AdapterRequest', 'BaseInferEngine'
24
+ ],
25
+ }
26
+
27
+ import sys
28
+
29
+ sys.modules[__name__] = _LazyModule(
30
+ __name__,
31
+ globals()['__file__'],
32
+ _import_structure,
33
+ module_spec=__spec__,
34
+ extra_objects={},
35
+ )
ms-swift/swift/llm/infer/infer_engine/__pycache__/utils.cpython-310.pyc ADDED
Binary file (21.5 kB). View file
 
ms-swift/swift/llm/model/__pycache__/model_arch.cpython-310.pyc ADDED
Binary file (9.6 kB). View file
 
ms-swift/swift/llm/model/model/__pycache__/gemma.cpython-310.pyc ADDED
Binary file (4.81 kB). View file
 
ms-swift/swift/llm/model/model/__pycache__/glm.cpython-310.pyc ADDED
Binary file (8.51 kB). View file
 
ms-swift/swift/llm/model/model/__pycache__/llama.cpython-310.pyc ADDED
Binary file (10 kB). View file
 
ms-swift/swift/llm/model/model/__pycache__/llava.cpython-310.pyc ADDED
Binary file (7.59 kB). View file
 
ms-swift/swift/llm/model/model/__pycache__/mistral.cpython-310.pyc ADDED
Binary file (3.87 kB). View file
 
ms-swift/swift/llm/model/model/baichuan.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from types import MethodType
3
+ from typing import Any, Dict
4
+
5
+ import torch.nn.functional as F
6
+ from torch import Tensor
7
+ from transformers import AutoConfig
8
+
9
+ from swift.llm import TemplateType
10
+ from swift.utils import get_logger
11
+ from ..constant import LLMModelType
12
+ from ..model_arch import ModelArch
13
+ from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
14
+ from ..utils import ModelInfo
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ def get_model_tokenizer_baichuan(model_dir: str,
20
+ model_info: ModelInfo,
21
+ model_kwargs: Dict[str, Any],
22
+ load_model: bool = True,
23
+ **kwargs):
24
+ model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, model_info, model_kwargs, load_model, **kwargs)
25
+ # baichuan-13b does not implement the `get_input_embeddings` function
26
+ # fix gradient_checkpointing bug
27
+ try:
28
+ if model is not None:
29
+ model.get_input_embeddings()
30
+ except NotImplementedError:
31
+ model.__class__.get_input_embeddings = lambda self: self.model.embed_tokens
32
+ return model, tokenizer
33
+
34
+
35
+ register_model(
36
+ ModelMeta(
37
+ LLMModelType.baichuan, [
38
+ ModelGroup([
39
+ Model('baichuan-inc/Baichuan-13B-Chat', 'baichuan-inc/Baichuan-13B-Chat'),
40
+ Model('baichuan-inc/Baichuan-13B-Base', 'baichuan-inc/Baichuan-13B-Base'),
41
+ Model('baichuan-inc/baichuan-7B', 'baichuan-inc/Baichuan-7B'),
42
+ ]),
43
+ ],
44
+ TemplateType.baichuan,
45
+ get_model_tokenizer_baichuan,
46
+ architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
47
+ model_arch=ModelArch.baichuan,
48
+ requires=['transformers<4.34']))
49
+
50
+
51
+ def get_model_tokenizer_baichuan_m1(model_dir: str,
52
+ model_info: ModelInfo,
53
+ model_kwargs: Dict[str, Any],
54
+ load_model: bool = True,
55
+ **kwargs):
56
+ from transformers.dynamic_module_utils import get_class_from_dynamic_module
57
+ rotary_embedding = get_class_from_dynamic_module('modeling_baichuan.RotaryEmbedding', model_dir)
58
+ _old_forward = rotary_embedding.forward
59
+
60
+ def _new_forward(self, q, k, seqlen_offset=None, cu_seqlens=None, max_seqlen=None):
61
+ q = q.to(k.dtype)
62
+ res = _old_forward(self, q, k, seqlen_offset, cu_seqlens, max_seqlen)
63
+ return res
64
+
65
+ rotary_embedding.forward = _new_forward
66
+
67
+ model, tokenizer = get_model_tokenizer_baichuan(model_dir, model_info, model_kwargs, load_model, **kwargs)
68
+ return model, tokenizer
69
+
70
+
71
+ register_model(
72
+ ModelMeta(
73
+ LLMModelType.baichuan_m1, [
74
+ ModelGroup([
75
+ Model('baichuan-inc/Baichuan-M1-14B-Instruct', 'baichuan-inc/Baichuan-M1-14B-Instruct'),
76
+ ]),
77
+ ],
78
+ TemplateType.baichuan_m1,
79
+ get_model_tokenizer_baichuan_m1,
80
+ architectures=['BaichuanM1ForCausalLM'],
81
+ model_arch=ModelArch.baichuan,
82
+ requires=['transformers>=4.48']))
83
+
84
+
85
+ def patch_baichuan2_lm_head_forward(self, hidden_states: Tensor) -> Tensor:
86
+ # patch: baichuan2 lm_head (fp32 bug)
87
+ if self.training:
88
+ norm_weight = F.normalize(self.weight).to(self.weight.dtype)
89
+ elif self.first_flag:
90
+ self.first_flag = False
91
+ self.weight.data = F.normalize(self.weight).to(self.weight.dtype)
92
+ norm_weight = self.weight
93
+ else:
94
+ norm_weight = self.weight
95
+ return F.linear(hidden_states, norm_weight)
96
+
97
+
98
+ def get_model_tokenizer_baichuan2(model_dir: str,
99
+ model_info: ModelInfo,
100
+ model_kwargs: Dict[str, Any],
101
+ load_model: bool = True,
102
+ model_config=None,
103
+ **kwargs):
104
+ if model_config is None:
105
+ model_config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True)
106
+ if not hasattr(model_config, 'z_loss_weight'):
107
+ model_config.z_loss_weight = 0
108
+ # patch: baichuan2_13b configuration_baichuan.py bug
109
+ if hasattr(model_config, 'gradient_checkpointing'):
110
+ gradient_checkpointing = model_config.gradient_checkpointing
111
+ if isinstance(gradient_checkpointing, (tuple, list)):
112
+ model_config.gradient_checkpointing = gradient_checkpointing[0]
113
+ model, tokenizer = get_model_tokenizer_with_flash_attn(
114
+ model_dir, model_info, model_kwargs, load_model, model_config=model_config, **kwargs)
115
+ model_ori = model
116
+ if model is not None:
117
+ if not hasattr(model, 'lm_head'): # fix awq
118
+ model = model.model
119
+ new_forward = MethodType(patch_baichuan2_lm_head_forward, model.lm_head)
120
+ if hasattr(model, '_old_forward'): # device_map
121
+ model.lm_head._old_forward = new_forward
122
+ else:
123
+ model.lm_head.forward = new_forward
124
+ return model_ori, tokenizer
125
+
126
+
127
+ register_model(
128
+ ModelMeta(
129
+ LLMModelType.baichuan2,
130
+ [
131
+ ModelGroup([
132
+ Model('baichuan-inc/Baichuan2-7B-Chat', 'baichuan-inc/Baichuan2-7B-Chat'),
133
+ Model('baichuan-inc/Baichuan2-7B-Base', 'baichuan-inc/Baichuan2-7B-Base'),
134
+ Model('baichuan-inc/Baichuan2-13B-Chat', 'baichuan-inc/Baichuan2-13B-Chat'),
135
+ Model('baichuan-inc/Baichuan2-13B-Base', 'baichuan-inc/Baichuan2-13B-Base'),
136
+ ]),
137
+ ModelGroup([
138
+ Model('baichuan-inc/Baichuan2-7B-Chat-4bits', 'baichuan-inc/Baichuan2-7B-Chat-4bits'),
139
+ Model('baichuan-inc/Baichuan2-13B-Chat-4bits', 'baichuan-inc/Baichuan2-13B-Chat-4bits'),
140
+ ],
141
+ requires=['bitsandbytes<0.41.2', 'accelerate<0.26'])
142
+ ],
143
+ TemplateType.baichuan,
144
+ get_model_tokenizer_baichuan2,
145
+ architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
146
+ model_arch=ModelArch.baichuan,
147
+ ))
ms-swift/swift/llm/model/model/mamba.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import Any, Dict
3
+
4
+ from swift.llm import TemplateType
5
+ from swift.utils import get_logger
6
+ from ..constant import LLMModelType
7
+ from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_from_local, register_model
8
+ from ..utils import ModelInfo
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ def get_model_tokenizer_mamba(model_dir: str,
14
+ model_info: ModelInfo,
15
+ model_kwargs: Dict[str, Any],
16
+ load_model: bool = True,
17
+ **kwargs):
18
+ logger.info('[IMPORTANT] Remember installing causal-conv1d>=1.2.0 and mamba-ssm, or you training and inference will'
19
+ 'be really slow!')
20
+ return get_model_tokenizer_from_local(model_dir, model_info, model_kwargs, load_model, **kwargs)
21
+
22
+
23
+ register_model(
24
+ ModelMeta(
25
+ LLMModelType.mamba,
26
+ [
27
+ ModelGroup([
28
+ Model('AI-ModelScope/mamba-130m-hf', 'state-spaces/mamba-130m-hf'),
29
+ Model('AI-ModelScope/mamba-370m-hf', 'state-spaces/mamba-370m-hf'),
30
+ Model('AI-ModelScope/mamba-390m-hf', 'state-spaces/mamba-390m-hf'),
31
+ Model('AI-ModelScope/mamba-790m-hf', 'state-spaces/mamba-790m-hf'),
32
+ Model('AI-ModelScope/mamba-1.4b-hf', 'state-spaces/mamba-1.4b-hf'),
33
+ Model('AI-ModelScope/mamba-2.8b-hf', 'state-spaces/mamba-2.8b-hf'),
34
+ ])
35
+ ],
36
+ TemplateType.default,
37
+ get_model_tokenizer_mamba,
38
+ architectures=['MambaForCausalLM'],
39
+ model_arch=None,
40
+ requires=['transformers>=4.39.0'],
41
+ ))
ms-swift/swift/llm/model/model/stepfun.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ import shutil
4
+ import sys
5
+
6
+ from transformers import AutoModel
7
+
8
+ from swift.llm import TemplateType
9
+ from ..constant import MLLMModelType
10
+ from ..model_arch import ModelArch
11
+ from ..register import (Model, ModelGroup, ModelMeta, get_model_tokenizer_multimodal,
12
+ get_model_tokenizer_with_flash_attn, register_model)
13
+ from ..utils import git_clone_github, safe_snapshot_download
14
+
15
+
16
+ def get_model_tokenizer_got_ocr2(*args, **kwargs):
17
+ kwargs['automodel_class'] = AutoModel
18
+ model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
19
+ return model, tokenizer
20
+
21
+
22
+ register_model(
23
+ ModelMeta(
24
+ MLLMModelType.got_ocr2, [
25
+ ModelGroup([
26
+ Model('stepfun-ai/GOT-OCR2_0', 'stepfun-ai/GOT-OCR2_0'),
27
+ ]),
28
+ ],
29
+ TemplateType.got_ocr2,
30
+ get_model_tokenizer_got_ocr2,
31
+ model_arch=ModelArch.got_ocr2,
32
+ architectures=['GOTQwenForCausalLM'],
33
+ tags=['vision']))
34
+
35
+
36
+ def get_model_tokenizer_got_ocr2_hf(model_dir, *args, **kwargs):
37
+ from transformers.models.got_ocr2 import GotOcr2ForConditionalGeneration
38
+ GotOcr2ForConditionalGeneration._no_split_modules.append('GotOcr2VisionLayer')
39
+ model, processor = get_model_tokenizer_multimodal(model_dir, *args, **kwargs)
40
+ return model, processor
41
+
42
+
43
+ register_model(
44
+ ModelMeta(
45
+ MLLMModelType.got_ocr2_hf, [
46
+ ModelGroup([
47
+ Model('stepfun-ai/GOT-OCR-2.0-hf', 'stepfun-ai/GOT-OCR-2.0-hf'),
48
+ ]),
49
+ ],
50
+ TemplateType.got_ocr2_hf,
51
+ get_model_tokenizer_got_ocr2_hf,
52
+ model_arch=ModelArch.got_ocr2_hf,
53
+ architectures=['GOTQwenForCausalLM'],
54
+ tags=['vision']))
55
+
56
+
57
+ def get_model_tokenizer_step_audio(*args, **kwargs):
58
+ local_repo_path = kwargs.get('local_repo_path')
59
+ if not local_repo_path:
60
+ local_repo_path = git_clone_github('https://github.com/stepfun-ai/Step-Audio.git')
61
+ sys.path.append(local_repo_path)
62
+ from tokenizer import StepAudioTokenizer
63
+ encoder_path = safe_snapshot_download('stepfun-ai/Step-Audio-Tokenizer', check_local=True)
64
+ model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
65
+ if model is not None:
66
+ model.encoder = StepAudioTokenizer(encoder_path)
67
+ # from tts import StepAudioTTS
68
+ # if not os.path.exists('speakers'):
69
+ # shutil.copytree(os.path.join(local_repo_path, 'speakers'), 'speakers')
70
+ # decoder_path = safe_snapshot_download('stepfun-ai/Step-Audio-TTS-3B', check_local=True)
71
+ # model.decoder = StepAudioTTS(decoder_path, model.encoder)
72
+ return model, tokenizer
73
+
74
+
75
+ register_model(
76
+ ModelMeta(
77
+ MLLMModelType.step_audio, [
78
+ ModelGroup([
79
+ Model('stepfun-ai/Step-Audio-Chat', 'stepfun-ai/Step-Audio-Chat'),
80
+ ]),
81
+ ],
82
+ TemplateType.step_audio,
83
+ get_model_tokenizer_step_audio,
84
+ architectures=['Step1ForCausalLM'],
85
+ requires=['funasr', 'sox', 'conformer', 'openai-whisper', 'librosa'],
86
+ tags=['audio']))
ms-swift/swift/llm/model/model/telechat.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from transformers import GenerationConfig
4
+
5
+ from swift.llm import TemplateType
6
+ from ..constant import LLMModelType
7
+ from ..model_arch import ModelArch
8
+ from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
9
+
10
+
11
+ def get_model_tokenizer_telechat(*args, **kwargs):
12
+ model, tokenizer = get_model_tokenizer_with_flash_attn(*args, **kwargs)
13
+ model_dir = args[0]
14
+ generation_config = GenerationConfig.from_pretrained(model_dir)
15
+ for k in ['bos_token_id', 'eos_token_id', 'pad_token_id', 'user_token_id', 'bot_token_id']:
16
+ setattr(tokenizer, k, getattr(generation_config, k))
17
+ return model, tokenizer
18
+
19
+
20
+ register_model(
21
+ ModelMeta(
22
+ LLMModelType.telechat,
23
+ [
24
+ ModelGroup([
25
+ Model('TeleAI/TeleChat-7B', 'Tele-AI/telechat-7B'),
26
+ Model('TeleAI/TeleChat-12B', 'Tele-AI/TeleChat-12B'),
27
+ Model('TeleAI/TeleChat-12B-v2', 'Tele-AI/TeleChat-12B-v2'),
28
+ Model('TeleAI/TeleChat-52B', 'TeleAI/TeleChat-52B'),
29
+ ]),
30
+ ModelGroup([
31
+ Model('swift/TeleChat-12B-V2-GPTQ-Int4'),
32
+ ]),
33
+ ModelGroup([
34
+ Model('TeleAI/TeleChat2-35B', 'Tele-AI/TeleChat2-35B'),
35
+ Model('TeleAI/TeleChat2-115B', 'Tele-AI/TeleChat2-115B'),
36
+ ]),
37
+ ],
38
+ TemplateType.telechat,
39
+ get_model_tokenizer_telechat,
40
+ model_arch=ModelArch.telechat,
41
+ architectures=['TelechatForCausalLM', 'TeleChatForCausalLM'],
42
+ ))
43
+
44
+ register_model(
45
+ ModelMeta(
46
+ LLMModelType.telechat2,
47
+ [
48
+ ModelGroup([
49
+ Model('TeleAI/TeleChat2-3B', 'Tele-AI/TeleChat2-3B'),
50
+ Model('TeleAI/TeleChat2-7B-32K', 'Tele-AI/TeleChat2-7B-32K'),
51
+ Model('TeleAI/TeleChat2-35B-32K', 'Tele-AI/TeleChat2-35B-32K'),
52
+ Model('TeleAI/TeleChat2-35B-Nov', 'Tele-AI/TeleChat2-35B-Nov'),
53
+ ]),
54
+ ],
55
+ TemplateType.telechat2,
56
+ get_model_tokenizer_telechat,
57
+ model_arch=ModelArch.telechat,
58
+ architectures=['TeleChat2ForCausalLM'],
59
+ ))
ms-swift/swift/llm/model/model/valley.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ import sys
4
+ from functools import partial, wraps
5
+ from typing import Any, Dict
6
+
7
+ from swift.llm import TemplateType
8
+ from ..constant import MLLMModelType
9
+ from ..model_arch import ModelArch
10
+ from ..register import Model, ModelGroup, ModelMeta, get_model_tokenizer_with_flash_attn, register_model
11
+ from ..utils import ModelInfo, git_clone_github, safe_snapshot_download
12
+
13
+
14
+ def get_model_tokenizer_valley(model_dir: str,
15
+ model_info: ModelInfo,
16
+ model_kwargs: Dict[str, Any],
17
+ load_model: bool = True,
18
+ **kwargs):
19
+ llm_model_type = kwargs.pop('llm_model_type')
20
+ local_repo_path = kwargs.get('local_repo_path')
21
+ if not local_repo_path:
22
+ repo_path = 'https://github.com/bytedance/Valley.git'
23
+ local_repo_path = git_clone_github(repo_path)
24
+ sys.path.append(local_repo_path)
25
+
26
+ if llm_model_type == 'valley':
27
+ from transformers.modeling_outputs import CausalLMOutputWithPast
28
+ from valley_eagle.model.language_model.valley_qwen2 import ValleyQwen2ForCausalLM, ValleyConfig
29
+ model_config = ValleyConfig.from_pretrained(model_dir)
30
+ model_config.mm_vision_tower = safe_snapshot_download(
31
+ 'AI-ModelScope/siglip-so400m-patch14-384', check_local=True)
32
+ model_config.eagle_vision_tower = safe_snapshot_download('Qwen/Qwen2-VL-7B-Instruct', check_local=True)
33
+ automodel_class = ValleyQwen2ForCausalLM
34
+
35
+ if not hasattr(ValleyQwen2ForCausalLM, '_origin_forward'):
36
+ forward = ValleyQwen2ForCausalLM.forward
37
+ ValleyQwen2ForCausalLM._origin_forward = forward
38
+
39
+ @wraps(forward)
40
+ def new_forward(*args, **kwargs):
41
+ import torch
42
+ outputs = forward(*args, **kwargs)
43
+ loss = outputs.loss
44
+ if loss is not None and loss.shape[-1] > 0:
45
+ loss = torch.mean(loss, dim=-1)
46
+ return CausalLMOutputWithPast(
47
+ loss=loss,
48
+ logits=outputs.logits,
49
+ past_key_values=outputs.past_key_values,
50
+ hidden_states=outputs.hidden_states,
51
+ attentions=outputs.attentions,
52
+ )
53
+
54
+ ValleyQwen2ForCausalLM.forward = new_forward
55
+ kwargs['model_config'] = model_config
56
+ kwargs['automodel_class'] = automodel_class
57
+ model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, model_info, model_kwargs, load_model, **kwargs)
58
+ if model is not None:
59
+ model.generation_config.repetition_penalty = 1.0 # Otherwise, Error. Same for original code.
60
+ from transformers import AutoProcessor, SiglipImageProcessor
61
+ tokenizer.image_processor = SiglipImageProcessor.from_pretrained(model.config.mm_vision_tower)
62
+ tokenizer.qwen2vl_processor = AutoProcessor.from_pretrained(
63
+ model.config.eagle_vision_tower, max_pixels=1280 * 28 * 28)
64
+ tokenizer.image_processor.crop_size = tokenizer.image_processor.size['height']
65
+ return model, tokenizer
66
+
67
+
68
+ register_model(
69
+ ModelMeta(
70
+ MLLMModelType.valley,
71
+ [
72
+ ModelGroup([
73
+ Model('bytedance-research/Valley-Eagle-7B'),
74
+ ], ),
75
+ ],
76
+ TemplateType.valley,
77
+ partial(get_model_tokenizer_valley, llm_model_type='valley'),
78
+ architectures=['ValleyQwen2ForCausalLM'],
79
+ model_arch=ModelArch.valley,
80
+ requires=['transformers>=4.42', 'av'],
81
+ tags=['vision'],
82
+ ))
ms-swift/swift/llm/model/patcher.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ from contextlib import contextmanager
4
+ from functools import wraps
5
+ from types import MethodType
6
+ from typing import Dict, List, Optional, Union
7
+
8
+ import accelerate
9
+ import torch
10
+ import torch.nn as nn
11
+ import transformers
12
+ from accelerate.utils import find_device
13
+ from packaging import version
14
+ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
15
+ from torch.nn.parallel import DistributedDataParallel as DDP
16
+ from transformers import PreTrainedModel, dynamic_module_utils, trainer
17
+ from transformers.modeling_outputs import SequenceClassifierOutputWithPast
18
+
19
+ from swift.llm import to_device, to_float_dtype
20
+ from swift.utils import get_dist_setting, get_logger, is_mp_ddp, safe_ddp_context, use_torchacc
21
+ from swift.utils.torch_utils import _get_max_memory, _sync_max_memory, get_device_count
22
+ from .model_arch import get_model_arch
23
+ from .utils import HfConfigFactory
24
+
25
+ logger = get_logger()
26
+
27
+
28
+ def patch_fixed_float_dtype(module: torch.nn.Module, dtype):
29
+ """Patch the module, to make sure the consisitent dtype."""
30
+
31
+ def get_float_dtype_hook(dtype):
32
+
33
+ def _float_dtype_hook(module, input, output):
34
+ return to_float_dtype(output, dtype)
35
+
36
+ return _float_dtype_hook
37
+
38
+ module.register_forward_hook(get_float_dtype_hook(dtype))
39
+
40
+
41
+ def patch_fixed_device(module: torch.nn.Module, device):
42
+ """Move the output to the specific device"""
43
+
44
+ def get_device_hook(device):
45
+
46
+ def _device_hook(module, input, output):
47
+ return to_device(output, device)
48
+
49
+ return _device_hook
50
+
51
+ module.register_forward_hook(get_device_hook(device))
52
+
53
+
54
+ def patch_output_clone(module: torch.nn.Module):
55
+ """Clone the output, to avoid the inplace problem"""
56
+
57
+ def _clone_hook(module, input, output):
58
+ return output.requires_grad_(True).clone()
59
+
60
+ module.register_forward_hook(_clone_hook)
61
+
62
+
63
+ def patch_output_normalizer(module: torch.nn.Module, model_meta):
64
+
65
+ def lm_head_forward(self, hidden_states):
66
+ return hidden_states
67
+
68
+ lm_heads = ['lm_head', 'output', 'embed_out', 'output_layer']
69
+ llm_prefix = getattr(get_model_arch(model_meta.model_arch), 'language_model', None)
70
+ if llm_prefix:
71
+ llm_model = getattr(module, llm_prefix[0])
72
+ else:
73
+ llm_model = module
74
+
75
+ if 'CausalLM' not in llm_model.__class__.__name__:
76
+ llm_model = module
77
+
78
+ found = False
79
+ for lm_head in lm_heads:
80
+ if hasattr(llm_model, lm_head):
81
+ getattr(llm_model, lm_head).forward = MethodType(lm_head_forward, getattr(llm_model, lm_head))
82
+ found = True
83
+ break
84
+
85
+ assert found, 'Cannot find the proper lm_head name'
86
+
87
+ def forward(self, input_ids: torch.LongTensor = None, attention_mask=None, *args, **kwargs):
88
+
89
+ outputs = self.forward_origin(input_ids=input_ids, attention_mask=attention_mask, *args, **kwargs)
90
+ hidden_states = outputs.logits
91
+ left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
92
+ if left_padding:
93
+ embeddings = hidden_states[:, -1]
94
+ else:
95
+ sequence_lengths = attention_mask.sum(dim=1) - 1
96
+ batch_size = hidden_states.shape[0]
97
+ embeddings = hidden_states[torch.arange(batch_size, device=hidden_states.device), sequence_lengths]
98
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
99
+
100
+ return {
101
+ 'last_hidden_state': embeddings.contiguous(),
102
+ }
103
+
104
+ llm_model.forward_origin = llm_model.forward
105
+ llm_model.forward = MethodType(forward, llm_model)
106
+
107
+
108
+ def patch_output_to_input_device(module: torch.nn.Module):
109
+ """Patch the module, to make sure the output is in the same device with the input.
110
+
111
+ Args:
112
+ module: The module to be patched
113
+ """
114
+
115
+ def _output_to_input_device_hook(module, args, kwargs, output):
116
+ device = find_device(args) or find_device(kwargs)
117
+ return to_device(output, device)
118
+
119
+ module.register_forward_hook(_output_to_input_device_hook, with_kwargs=True)
120
+
121
+
122
+ @contextmanager
123
+ def patch_device_map():
124
+ _get_no_split_modules = PreTrainedModel._get_no_split_modules
125
+
126
+ def _new_get_no_split_modules(self, device_map: str):
127
+ for module in self.modules():
128
+ if isinstance(module, PreTrainedModel) and module._no_split_modules is None:
129
+ module.__class__._no_split_modules = []
130
+ return _get_no_split_modules(self, device_map)
131
+
132
+ PreTrainedModel._get_no_split_modules = _new_get_no_split_modules
133
+ try:
134
+ yield
135
+ finally:
136
+ PreTrainedModel._get_no_split_modules = _get_no_split_modules
137
+
138
+
139
+ @contextmanager
140
+ def patch_ignore_check_imports():
141
+ import transformers.dynamic_module_utils as td
142
+
143
+ def _check_imports(filename) -> List[str]:
144
+ return td.get_relative_imports(filename)
145
+
146
+ _old_check_imports = td.check_imports
147
+ td.check_imports = _check_imports
148
+ try:
149
+ yield
150
+ finally:
151
+ td.check_imports = _old_check_imports
152
+
153
+
154
+ def _patch_sequence_classification(model, model_meta):
155
+ hidden_size = HfConfigFactory.get_config_attr(model.config, 'hidden_size')
156
+ initializer_range = HfConfigFactory.get_config_attr(model.config, 'initializer_range')
157
+
158
+ lm_heads = ['lm_head', 'output', 'embed_out', 'output_layer']
159
+ llm_prefix = getattr(get_model_arch(model_meta.model_arch), 'language_model', None)
160
+ if llm_prefix:
161
+ llm_model = getattr(model, llm_prefix[0])
162
+ else:
163
+ llm_model = model
164
+ if 'CausalLM' not in llm_model.__class__.__name__: # fix qwen2_vl
165
+ llm_model = model
166
+ llm_model.num_labels = model.config.num_labels
167
+ llm_model.score = nn.Linear(hidden_size, llm_model.num_labels, bias=False, dtype=llm_model.dtype)
168
+ if llm_model.score.weight.device == torch.device('meta'):
169
+ llm_model.score.to_empty(device='cpu')
170
+ llm_model.score.weight.data.normal_(mean=0.0, std=initializer_range)
171
+ for lm_head in lm_heads:
172
+ if hasattr(llm_model, lm_head):
173
+ setattr(llm_model, lm_head, nn.Identity())
174
+ break
175
+
176
+ origin_forward = llm_model.forward.__func__
177
+
178
+ @wraps(origin_forward)
179
+ def new_forward(self, *args, **kwargs):
180
+ labels = kwargs.pop('labels', None)
181
+ return_dict = kwargs.pop('return_dict', None)
182
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
183
+ input_ids = kwargs.get('input_ids')
184
+ inputs_embeds = kwargs.get('inputs_embeds')
185
+
186
+ output = origin_forward(self, *args, **kwargs)
187
+ output.logits = output.logits.to(self.score.weight.dtype)
188
+ logits = self.score(output.logits)
189
+ if input_ids is not None:
190
+ batch_size = input_ids.shape[0]
191
+ else:
192
+ batch_size = inputs_embeds.shape[0]
193
+
194
+ if self.config.pad_token_id is None and batch_size != 1:
195
+ raise ValueError('Cannot handle batch sizes > 1 if no padding token is defined.')
196
+ if self.config.pad_token_id is None:
197
+ sequence_lengths = -1
198
+ else:
199
+ if input_ids is not None:
200
+ # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
201
+ sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
202
+ sequence_lengths = sequence_lengths % input_ids.shape[-1]
203
+ sequence_lengths = sequence_lengths.to(logits.device)
204
+ else:
205
+ sequence_lengths = -1
206
+
207
+ pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
208
+
209
+ loss = None
210
+ if labels is not None:
211
+ labels = labels.to(logits.device)
212
+ if self.config.problem_type is None:
213
+ if self.num_labels == 1:
214
+ self.config.problem_type = 'regression'
215
+ elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
216
+ self.config.problem_type = 'single_label_classification'
217
+ else:
218
+ self.config.problem_type = 'multi_label_classification'
219
+
220
+ if self.config.problem_type == 'regression':
221
+ loss_fct = MSELoss()
222
+ if self.num_labels == 1:
223
+ loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
224
+ else:
225
+ loss = loss_fct(pooled_logits, labels)
226
+ elif self.config.problem_type == 'single_label_classification':
227
+ loss_fct = CrossEntropyLoss()
228
+ loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
229
+ elif self.config.problem_type == 'multi_label_classification':
230
+ loss_fct = BCEWithLogitsLoss()
231
+ loss = loss_fct(pooled_logits, labels)
232
+ if not return_dict:
233
+ output = (pooled_logits, ) + output[1:]
234
+ return ((loss, ) + output) if loss is not None else output
235
+
236
+ return SequenceClassifierOutputWithPast(
237
+ loss=loss,
238
+ logits=pooled_logits,
239
+ past_key_values=output.past_key_values,
240
+ hidden_states=output.hidden_states,
241
+ attentions=output.attentions,
242
+ )
243
+
244
+ llm_model.forward = MethodType(new_forward, llm_model)
245
+
246
+
247
+ @contextmanager
248
+ def patch_automodel_for_sequence_classification(model_meta):
249
+ from_pretrained = PreTrainedModel.from_pretrained.__func__
250
+
251
+ @classmethod
252
+ def _new_from_pretrained(cls, *args, **kwargs):
253
+ __init__ = cls.__init__
254
+
255
+ def __new_init__(self, *args, **kwargs):
256
+ __init__(self, *args, **kwargs)
257
+ _patch_sequence_classification(self, model_meta)
258
+
259
+ cls.__init__ = __new_init__
260
+ if hasattr(cls, '_tp_plan'): # fix tp_plan
261
+ cls._tp_plan = cls._tp_plan or {}
262
+ res = from_pretrained(cls, *args, **kwargs)
263
+ cls.__init__ = __init__
264
+ return res
265
+
266
+ PreTrainedModel.from_pretrained = _new_from_pretrained
267
+
268
+ try:
269
+ yield
270
+ finally:
271
+ PreTrainedModel.from_pretrained = classmethod(from_pretrained)
272
+
273
+
274
+ @contextmanager
275
+ def patch_automodel(automodel_class, model_info):
276
+ from_pretrained = PreTrainedModel.from_pretrained.__func__
277
+
278
+ @classmethod
279
+ def _new_from_pretrained(cls, *args, **kwargs):
280
+ if 'AutoAWQFor' in automodel_class.__name__:
281
+ kwargs.pop('use_cache', None)
282
+ if model_info.quant_method == 'gptq':
283
+ cls.main_input_name = 'input_ids'
284
+ if hasattr(cls, '_tp_plan'): # fix tp_plan
285
+ cls._tp_plan = cls._tp_plan or {}
286
+ model = from_pretrained(cls, *args, **kwargs)
287
+ return model
288
+
289
+ PreTrainedModel.from_pretrained = _new_from_pretrained
290
+
291
+ try:
292
+ yield
293
+ finally:
294
+ PreTrainedModel.from_pretrained = classmethod(from_pretrained)
295
+
296
+
297
+ _mp_ddp_patched = False
298
+
299
+
300
+ def patch_mp_ddp():
301
+ """Patch ddp with device_map.
302
+ After patching, the ddp can run with the device_map.
303
+ This should be called before any training starts.
304
+ """
305
+ global _mp_ddp_patched
306
+ if is_mp_ddp() and not _mp_ddp_patched:
307
+ _mp_ddp_patched = True
308
+ from accelerate.utils.modeling import get_balanced_memory, infer_auto_device_map
309
+
310
+ @wraps(infer_auto_device_map)
311
+ def _infer_auto_device_map_patch(model: nn.Module,
312
+ max_memory: Optional[Dict[Union[int, str], Union[int, str]]] = None,
313
+ **kwargs) -> Dict[str, Union[int, str, torch.device]]:
314
+ """The auxiliary function for supports MP + DDP. Monkey Patching.
315
+ add feat in accelerate to support MP + DDP"""
316
+ verbose = kwargs.pop('verbose', False)
317
+ n_gpu = get_device_count()
318
+ _, local_rank, _, local_world_size = get_dist_setting()
319
+ device_ids = list(range(local_rank, n_gpu, local_world_size))
320
+ max_memory = _get_max_memory(device_ids)
321
+ max_memory = _sync_max_memory(max_memory)
322
+ max_memory = get_balanced_memory(model, max_memory, low_zero=False, **kwargs)
323
+ max_memory = {k: v for k, v in max_memory.items() if v > 0}
324
+ return infer_auto_device_map(model, max_memory, verbose=verbose, **kwargs)
325
+
326
+ _old_ddp_init = DDP.__init__
327
+ accelerate.accelerator.torch.nn.parallel.DistributedDataParallel.__init__ = (
328
+ lambda self, model, device_ids, output_device, *args, **kwargs: _old_ddp_init(self, model, *args, **kwargs))
329
+ transformers.modeling_utils.get_balanced_memory = lambda *args, **kwargs: None
330
+ transformers.modeling_utils.infer_auto_device_map = _infer_auto_device_map_patch
331
+
332
+ if is_mp_ddp() or use_torchacc():
333
+ _old_accelerator_init = trainer.Accelerator.__init__
334
+ trainer.Accelerator.__init__ = (lambda self, device_placement=False, *args, **kwargs: _old_accelerator_init(
335
+ self, device_placement=device_placement, *args, **kwargs))
336
+ trainer.Accelerator.verify_device_map = lambda *args, **kwargs: False
337
+
338
+
339
+ @contextmanager
340
+ def patch_get_dynamic_module():
341
+ origin_get_cached_module_file = dynamic_module_utils.get_cached_module_file
342
+
343
+ def new_get_cached_module_file(pretrained_model_name_or_path, *args, **kwargs):
344
+ with safe_ddp_context(hash_id=str(pretrained_model_name_or_path)):
345
+ return origin_get_cached_module_file(pretrained_model_name_or_path, *args, **kwargs)
346
+
347
+ dynamic_module_utils.get_cached_module_file = new_get_cached_module_file
348
+ try:
349
+ yield
350
+ finally:
351
+ dynamic_module_utils.get_cached_module_file = origin_get_cached_module_file
352
+
353
+
354
+ @contextmanager
355
+ def patch_tp_plan():
356
+ if not is_mp_ddp() or version.parse(transformers.__version__) < version.parse('4.50'):
357
+ yield
358
+ return
359
+ WORLD_SIZE = os.environ.get('WORLD_SIZE')
360
+ os.environ['_PATCH_WORLD_SIZE'] = WORLD_SIZE
361
+ os.environ.pop('WORLD_SIZE')
362
+ yield
363
+ os.environ['WORLD_SIZE'] = WORLD_SIZE
ms-swift/swift/llm/sampling/distill_sampler.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from copy import deepcopy
3
+ from typing import List, Optional
4
+
5
+ from openai import OpenAI
6
+
7
+ from swift.llm.infer.protocol import InferRequest, RequestConfig
8
+ from swift.llm.sampling.vanilla_sampler import VanillaSampler
9
+ from .utils import get_messages_md5
10
+
11
+
12
+ class OpenAI_Engine():
13
+
14
+ def __init__(
15
+ self,
16
+ model: str,
17
+ stream: bool = False,
18
+ base_url: str = 'https://dashscope.aliyuncs.com/compatible-mode/v1',
19
+ api_key: str = '',
20
+ **kwargs,
21
+ ):
22
+ self.model = model
23
+ self.stream = stream
24
+ self.client = OpenAI(api_key=api_key if api_key else os.getenv('OPENAI_API_KEY'), base_url=base_url, **kwargs)
25
+
26
+ def infer(
27
+ self,
28
+ infer_requests: List[InferRequest],
29
+ request_config: Optional[RequestConfig] = None,
30
+ ):
31
+ resp_contents = []
32
+ for infer_request in infer_requests:
33
+ completion = self.client.chat.completions.create(
34
+ model=self.model,
35
+ messages=infer_request['messages'],
36
+ temperature=request_config.temperature,
37
+ top_p=request_config.top_p,
38
+ max_tokens=request_config.max_tokens,
39
+ stream=self.stream,
40
+ )
41
+ if self.stream:
42
+ reasoning_content = ''
43
+ content = ''
44
+ for chunk in completion:
45
+ chunk_choices = chunk.choices
46
+ if len(chunk_choices) == 0:
47
+ continue
48
+ reasoning_chunk = chunk_choices[0].delta.reasoning_content if hasattr(
49
+ chunk_choices[0].delta, 'reasoning_content') else ''
50
+ answer_chunk = chunk_choices[0].delta.content
51
+ if reasoning_chunk:
52
+ reasoning_content += reasoning_chunk
53
+ elif answer_chunk:
54
+ content += answer_chunk
55
+ else:
56
+ if hasattr(completion.choices[0].message, 'reasoning_content'):
57
+ reasoning_content = completion.choices[0].message.reasoning_content
58
+ content = completion.choices[0].message.content
59
+ assert len(content) > 0, 'Empty completion'
60
+ if reasoning_content:
61
+ resp_content = f'<think>{reasoning_content}</think>\n\n<answer>{content}</answer>'
62
+ else:
63
+ resp_content = content
64
+ resp_contents.append(resp_content)
65
+
66
+ return resp_contents
67
+
68
+
69
+ class DistillSampler(VanillaSampler):
70
+
71
+ def __init__(self, *args, **kwargs):
72
+ super(VanillaSampler, self).__init__(*args, **kwargs)
73
+ assert self.args.sampler_engine == 'client'
74
+ _Engine = OpenAI_Engine
75
+ self.infer_engine = _Engine(model=self.args.model, stream=self.args.stream, **self.args.engine_kwargs)
76
+ self.infer_engine.strict = False
77
+ self.caches = self.read_cache()
78
+
79
+ def _prepare_model_tokenizer(self):
80
+ pass
81
+
82
+ def _prepare_template(self):
83
+ pass
84
+
85
+ def extract_choice(self, resp):
86
+ message = resp.choices[0].message
87
+ if hasattr(message, 'reasoning_content'):
88
+ reps_content = f'<think>{message.reasoning_content}</think>\n\n<answer>{message.content}</answer>'
89
+ else:
90
+ reps_content = message.content
91
+ return reps_content
92
+
93
+ def generate(self, data):
94
+ resp_all = []
95
+ infer_requests = []
96
+ sent = 0
97
+ rows = self.convert_data_to_rows(data)
98
+ for idx, row in enumerate(rows):
99
+ row = deepcopy(row)
100
+ messages = row['messages']
101
+ uuid = get_messages_md5(row)
102
+ if uuid in self.caches:
103
+ choices = self.caches[uuid]['choices']
104
+ if len(choices) == self.args.num_return_sequences:
105
+ continue
106
+ if self.args.system:
107
+ if messages[0]['role'] == 'system':
108
+ messages[0]['content'] = self.args.system
109
+ else:
110
+ messages.insert(0, {'role': 'system', 'content': self.args.system})
111
+ if messages[-1]['role'] == 'assistant':
112
+ messages = messages[:-1]
113
+
114
+ row['messages'] = messages
115
+ infer_request = row
116
+ for i in range(self.args.num_return_sequences):
117
+ infer_requests.append(deepcopy(infer_request))
118
+ sent += 1
119
+
120
+ request_config = RequestConfig(
121
+ max_tokens=self.args.max_new_tokens,
122
+ temperature=self.args.temperature,
123
+ top_k=self.args.top_k,
124
+ top_p=self.args.top_p,
125
+ )
126
+
127
+ resp_list = []
128
+ if len(infer_requests) > 0:
129
+ resp_list = self.infer_engine.infer(infer_requests, request_config=request_config)
130
+
131
+ _cur = 0
132
+ for idx, row in enumerate(rows):
133
+ row = deepcopy(row)
134
+ uuid = get_messages_md5(row)
135
+ if uuid in self.caches:
136
+ choices = self.caches[uuid]['choices']
137
+ if len(choices) == self.args.num_return_sequences:
138
+ row['choices'] = choices
139
+ resp_all.append(row)
140
+ continue
141
+
142
+ resps = row
143
+ resps['choices'] = []
144
+ for j in range(self.args.num_return_sequences * _cur, self.args.num_return_sequences * (_cur + 1)):
145
+ resps['choices'].append(resp_list[j])
146
+ resp_all.append(resps)
147
+ _cur += 1
148
+ return resp_all