HariLogicgo commited on
Commit
40cfce6
·
1 Parent(s): 2de7321

added model and weights

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Wan2.2-S2V-14B/.gitattributes +18 -0
  2. Wan2.2-S2V-14B/config.json +44 -0
  3. Wan2.2-S2V-14B/configuration.json +1 -0
  4. Wan2.2-S2V-14B/diffusion_pytorch_model.safetensors.index.json +0 -0
  5. Wan2.2-S2V-14B/google/umt5-xxl/special_tokens_map.json +308 -0
  6. Wan2.2-S2V-14B/google/umt5-xxl/tokenizer_config.json +2748 -0
  7. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/.msc +0 -0
  8. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/.mv +1 -0
  9. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/README.md +165 -0
  10. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/alphabet.json +1 -0
  11. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/config.json +75 -0
  12. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/configuration.json +1 -0
  13. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/eval.py +164 -0
  14. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/full_eval.sh +15 -0
  15. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/language_model/attrs.json +1 -0
  16. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/language_model/unigrams.txt +0 -0
  17. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_predictions.txt +0 -0
  18. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_predictions_greedy.txt +0 -0
  19. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_targets.txt +0 -0
  20. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_predictions.txt +0 -0
  21. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_predictions_greedy.txt +0 -0
  22. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_targets.txt +0 -0
  23. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/mozilla-foundation_common_voice_6_0_en_test_eval_results.txt +2 -0
  24. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/mozilla-foundation_common_voice_6_0_en_test_eval_results_greedy.txt +2 -0
  25. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/preprocessor_config.json +10 -0
  26. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/special_tokens_map.json +1 -0
  27. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/speech-recognition-community-v2_dev_data_en_validation_eval_results.txt +2 -0
  28. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/speech-recognition-community-v2_dev_data_en_validation_eval_results_greedy.txt +2 -0
  29. Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/vocab.json +1 -0
  30. Wan2.2/.gitignore +7 -0
  31. Wan2.2/INSTALL.md +55 -0
  32. Wan2.2/Makefile +5 -0
  33. Wan2.2/generate.py +575 -0
  34. Wan2.2/pyproject.toml +66 -0
  35. Wan2.2/requirements.txt +16 -0
  36. Wan2.2/requirements_animate.txt +8 -0
  37. Wan2.2/requirements_s2v.txt +19 -0
  38. Wan2.2/tests/README.md +6 -0
  39. Wan2.2/tests/test.sh +91 -0
  40. Wan2.2/wan/__init__.py +7 -0
  41. Wan2.2/wan/animate.py +648 -0
  42. Wan2.2/wan/configs/__init__.py +50 -0
  43. Wan2.2/wan/configs/shared_config.py +20 -0
  44. Wan2.2/wan/configs/wan_animate_14B.py +40 -0
  45. Wan2.2/wan/configs/wan_i2v_A14B.py +37 -0
  46. Wan2.2/wan/configs/wan_s2v_14B.py +59 -0
  47. Wan2.2/wan/configs/wan_t2v_A14B.py +37 -0
  48. Wan2.2/wan/configs/wan_ti2v_5B.py +36 -0
  49. Wan2.2/wan/distributed/__init__.py +1 -0
  50. Wan2.2/wan/distributed/fsdp.py +45 -0
Wan2.2-S2V-14B/.gitattributes ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ . filter=lfs diff=lfs merge=lfs -text
2
+ Wan2.1_VAE.pth filter=lfs diff=lfs merge=lfs -text
3
+ diffusion_pytorch_model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ diffusion_pytorch_model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
5
+ diffusion_pytorch_model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
6
+ diffusion_pytorch_model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
7
+ google/umt5-xxl/spiece.model filter=lfs diff=lfs merge=lfs -text
8
+ google/umt5-xxl/tokenizer.json filter=lfs diff=lfs merge=lfs -text
9
+ models_t5_umt5-xxl-enc-bf16.pth filter=lfs diff=lfs merge=lfs -text
10
+ assets/comp_effic.png filter=lfs diff=lfs merge=lfs -text
11
+ assets/moe_2.png filter=lfs diff=lfs merge=lfs -text
12
+ assets/performance.png filter=lfs diff=lfs merge=lfs -text
13
+ assets/vae.png filter=lfs diff=lfs merge=lfs -text
14
+ wav2vec2-large-xlsr-53-english/flax_model.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ wav2vec2-large-xlsr-53-english/language_model/lm.binary filter=lfs diff=lfs merge=lfs -text
16
+ wav2vec2-large-xlsr-53-english/model.safetensors filter=lfs diff=lfs merge=lfs -text
17
+ wav2vec2-large-xlsr-53-english/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
18
+ assets/471504690-b63bfa58-d5d7-4de6-a1a2-98970b06d9a7.mp4 filter=lfs diff=lfs merge=lfs -text
Wan2.2-S2V-14B/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__name__": "Config: Transformer config for WanModel_S2V",
3
+ "_class_name": "WanModel_S2V",
4
+ "_diffusers_version": "0.34.0",
5
+ "adain_mode": "attn_norm",
6
+ "add_last_motion": true,
7
+ "audio_dim": 1024,
8
+ "audio_inject_layers": [
9
+ 0,
10
+ 4,
11
+ 8,
12
+ 12,
13
+ 16,
14
+ 20,
15
+ 24,
16
+ 27,
17
+ 30,
18
+ 33,
19
+ 36,
20
+ 39
21
+ ],
22
+ "cond_dim": 16,
23
+ "dim": 5120,
24
+ "enable_adain": true,
25
+ "enable_framepack": true,
26
+ "enable_motioner": false,
27
+ "enable_tsm": false,
28
+ "eps": 1e-06,
29
+ "ffn_dim": 13824,
30
+ "framepack_drop_mode": "padd",
31
+ "freq_dim": 256,
32
+ "in_dim": 16,
33
+ "model_type": "s2v",
34
+ "motion_token_num": 1024,
35
+ "num_audio_token": 4,
36
+ "num_heads": 40,
37
+ "num_layers": 40,
38
+ "out_dim": 16,
39
+ "text_len": 512,
40
+ "trainable_token_pos_emb": false,
41
+ "zero_init": true,
42
+ "zero_timestep": true
43
+ }
44
+
Wan2.2-S2V-14B/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"any-to-any"}
Wan2.2-S2V-14B/diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/google/umt5-xxl/special_tokens_map.json ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>",
103
+ "<extra_id_100>",
104
+ "<extra_id_101>",
105
+ "<extra_id_102>",
106
+ "<extra_id_103>",
107
+ "<extra_id_104>",
108
+ "<extra_id_105>",
109
+ "<extra_id_106>",
110
+ "<extra_id_107>",
111
+ "<extra_id_108>",
112
+ "<extra_id_109>",
113
+ "<extra_id_110>",
114
+ "<extra_id_111>",
115
+ "<extra_id_112>",
116
+ "<extra_id_113>",
117
+ "<extra_id_114>",
118
+ "<extra_id_115>",
119
+ "<extra_id_116>",
120
+ "<extra_id_117>",
121
+ "<extra_id_118>",
122
+ "<extra_id_119>",
123
+ "<extra_id_120>",
124
+ "<extra_id_121>",
125
+ "<extra_id_122>",
126
+ "<extra_id_123>",
127
+ "<extra_id_124>",
128
+ "<extra_id_125>",
129
+ "<extra_id_126>",
130
+ "<extra_id_127>",
131
+ "<extra_id_128>",
132
+ "<extra_id_129>",
133
+ "<extra_id_130>",
134
+ "<extra_id_131>",
135
+ "<extra_id_132>",
136
+ "<extra_id_133>",
137
+ "<extra_id_134>",
138
+ "<extra_id_135>",
139
+ "<extra_id_136>",
140
+ "<extra_id_137>",
141
+ "<extra_id_138>",
142
+ "<extra_id_139>",
143
+ "<extra_id_140>",
144
+ "<extra_id_141>",
145
+ "<extra_id_142>",
146
+ "<extra_id_143>",
147
+ "<extra_id_144>",
148
+ "<extra_id_145>",
149
+ "<extra_id_146>",
150
+ "<extra_id_147>",
151
+ "<extra_id_148>",
152
+ "<extra_id_149>",
153
+ "<extra_id_150>",
154
+ "<extra_id_151>",
155
+ "<extra_id_152>",
156
+ "<extra_id_153>",
157
+ "<extra_id_154>",
158
+ "<extra_id_155>",
159
+ "<extra_id_156>",
160
+ "<extra_id_157>",
161
+ "<extra_id_158>",
162
+ "<extra_id_159>",
163
+ "<extra_id_160>",
164
+ "<extra_id_161>",
165
+ "<extra_id_162>",
166
+ "<extra_id_163>",
167
+ "<extra_id_164>",
168
+ "<extra_id_165>",
169
+ "<extra_id_166>",
170
+ "<extra_id_167>",
171
+ "<extra_id_168>",
172
+ "<extra_id_169>",
173
+ "<extra_id_170>",
174
+ "<extra_id_171>",
175
+ "<extra_id_172>",
176
+ "<extra_id_173>",
177
+ "<extra_id_174>",
178
+ "<extra_id_175>",
179
+ "<extra_id_176>",
180
+ "<extra_id_177>",
181
+ "<extra_id_178>",
182
+ "<extra_id_179>",
183
+ "<extra_id_180>",
184
+ "<extra_id_181>",
185
+ "<extra_id_182>",
186
+ "<extra_id_183>",
187
+ "<extra_id_184>",
188
+ "<extra_id_185>",
189
+ "<extra_id_186>",
190
+ "<extra_id_187>",
191
+ "<extra_id_188>",
192
+ "<extra_id_189>",
193
+ "<extra_id_190>",
194
+ "<extra_id_191>",
195
+ "<extra_id_192>",
196
+ "<extra_id_193>",
197
+ "<extra_id_194>",
198
+ "<extra_id_195>",
199
+ "<extra_id_196>",
200
+ "<extra_id_197>",
201
+ "<extra_id_198>",
202
+ "<extra_id_199>",
203
+ "<extra_id_200>",
204
+ "<extra_id_201>",
205
+ "<extra_id_202>",
206
+ "<extra_id_203>",
207
+ "<extra_id_204>",
208
+ "<extra_id_205>",
209
+ "<extra_id_206>",
210
+ "<extra_id_207>",
211
+ "<extra_id_208>",
212
+ "<extra_id_209>",
213
+ "<extra_id_210>",
214
+ "<extra_id_211>",
215
+ "<extra_id_212>",
216
+ "<extra_id_213>",
217
+ "<extra_id_214>",
218
+ "<extra_id_215>",
219
+ "<extra_id_216>",
220
+ "<extra_id_217>",
221
+ "<extra_id_218>",
222
+ "<extra_id_219>",
223
+ "<extra_id_220>",
224
+ "<extra_id_221>",
225
+ "<extra_id_222>",
226
+ "<extra_id_223>",
227
+ "<extra_id_224>",
228
+ "<extra_id_225>",
229
+ "<extra_id_226>",
230
+ "<extra_id_227>",
231
+ "<extra_id_228>",
232
+ "<extra_id_229>",
233
+ "<extra_id_230>",
234
+ "<extra_id_231>",
235
+ "<extra_id_232>",
236
+ "<extra_id_233>",
237
+ "<extra_id_234>",
238
+ "<extra_id_235>",
239
+ "<extra_id_236>",
240
+ "<extra_id_237>",
241
+ "<extra_id_238>",
242
+ "<extra_id_239>",
243
+ "<extra_id_240>",
244
+ "<extra_id_241>",
245
+ "<extra_id_242>",
246
+ "<extra_id_243>",
247
+ "<extra_id_244>",
248
+ "<extra_id_245>",
249
+ "<extra_id_246>",
250
+ "<extra_id_247>",
251
+ "<extra_id_248>",
252
+ "<extra_id_249>",
253
+ "<extra_id_250>",
254
+ "<extra_id_251>",
255
+ "<extra_id_252>",
256
+ "<extra_id_253>",
257
+ "<extra_id_254>",
258
+ "<extra_id_255>",
259
+ "<extra_id_256>",
260
+ "<extra_id_257>",
261
+ "<extra_id_258>",
262
+ "<extra_id_259>",
263
+ "<extra_id_260>",
264
+ "<extra_id_261>",
265
+ "<extra_id_262>",
266
+ "<extra_id_263>",
267
+ "<extra_id_264>",
268
+ "<extra_id_265>",
269
+ "<extra_id_266>",
270
+ "<extra_id_267>",
271
+ "<extra_id_268>",
272
+ "<extra_id_269>",
273
+ "<extra_id_270>",
274
+ "<extra_id_271>",
275
+ "<extra_id_272>",
276
+ "<extra_id_273>",
277
+ "<extra_id_274>",
278
+ "<extra_id_275>",
279
+ "<extra_id_276>",
280
+ "<extra_id_277>",
281
+ "<extra_id_278>",
282
+ "<extra_id_279>",
283
+ "<extra_id_280>",
284
+ "<extra_id_281>",
285
+ "<extra_id_282>",
286
+ "<extra_id_283>",
287
+ "<extra_id_284>",
288
+ "<extra_id_285>",
289
+ "<extra_id_286>",
290
+ "<extra_id_287>",
291
+ "<extra_id_288>",
292
+ "<extra_id_289>",
293
+ "<extra_id_290>",
294
+ "<extra_id_291>",
295
+ "<extra_id_292>",
296
+ "<extra_id_293>",
297
+ "<extra_id_294>",
298
+ "<extra_id_295>",
299
+ "<extra_id_296>",
300
+ "<extra_id_297>",
301
+ "<extra_id_298>",
302
+ "<extra_id_299>"
303
+ ],
304
+ "bos_token": "<s>",
305
+ "eos_token": "</s>",
306
+ "pad_token": "<pad>",
307
+ "unk_token": "<unk>"
308
+ }
Wan2.2-S2V-14B/google/umt5-xxl/tokenizer_config.json ADDED
@@ -0,0 +1,2748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "256000": {
36
+ "content": "<extra_id_299>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "256001": {
44
+ "content": "<extra_id_298>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "256002": {
52
+ "content": "<extra_id_297>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "256003": {
60
+ "content": "<extra_id_296>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "256004": {
68
+ "content": "<extra_id_295>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "256005": {
76
+ "content": "<extra_id_294>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "256006": {
84
+ "content": "<extra_id_293>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "256007": {
92
+ "content": "<extra_id_292>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "256008": {
100
+ "content": "<extra_id_291>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "256009": {
108
+ "content": "<extra_id_290>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "256010": {
116
+ "content": "<extra_id_289>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "256011": {
124
+ "content": "<extra_id_288>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "256012": {
132
+ "content": "<extra_id_287>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "256013": {
140
+ "content": "<extra_id_286>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "256014": {
148
+ "content": "<extra_id_285>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "256015": {
156
+ "content": "<extra_id_284>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "256016": {
164
+ "content": "<extra_id_283>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "256017": {
172
+ "content": "<extra_id_282>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "256018": {
180
+ "content": "<extra_id_281>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "256019": {
188
+ "content": "<extra_id_280>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "256020": {
196
+ "content": "<extra_id_279>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "256021": {
204
+ "content": "<extra_id_278>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "256022": {
212
+ "content": "<extra_id_277>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "256023": {
220
+ "content": "<extra_id_276>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "256024": {
228
+ "content": "<extra_id_275>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "256025": {
236
+ "content": "<extra_id_274>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "256026": {
244
+ "content": "<extra_id_273>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "256027": {
252
+ "content": "<extra_id_272>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "256028": {
260
+ "content": "<extra_id_271>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "256029": {
268
+ "content": "<extra_id_270>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "256030": {
276
+ "content": "<extra_id_269>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "256031": {
284
+ "content": "<extra_id_268>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "256032": {
292
+ "content": "<extra_id_267>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "256033": {
300
+ "content": "<extra_id_266>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "256034": {
308
+ "content": "<extra_id_265>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "256035": {
316
+ "content": "<extra_id_264>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "256036": {
324
+ "content": "<extra_id_263>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "256037": {
332
+ "content": "<extra_id_262>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "256038": {
340
+ "content": "<extra_id_261>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "256039": {
348
+ "content": "<extra_id_260>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "256040": {
356
+ "content": "<extra_id_259>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "256041": {
364
+ "content": "<extra_id_258>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "256042": {
372
+ "content": "<extra_id_257>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "256043": {
380
+ "content": "<extra_id_256>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "256044": {
388
+ "content": "<extra_id_255>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "256045": {
396
+ "content": "<extra_id_254>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "256046": {
404
+ "content": "<extra_id_253>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "256047": {
412
+ "content": "<extra_id_252>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "256048": {
420
+ "content": "<extra_id_251>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "256049": {
428
+ "content": "<extra_id_250>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "256050": {
436
+ "content": "<extra_id_249>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "256051": {
444
+ "content": "<extra_id_248>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "256052": {
452
+ "content": "<extra_id_247>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "256053": {
460
+ "content": "<extra_id_246>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "256054": {
468
+ "content": "<extra_id_245>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "256055": {
476
+ "content": "<extra_id_244>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "256056": {
484
+ "content": "<extra_id_243>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "256057": {
492
+ "content": "<extra_id_242>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "256058": {
500
+ "content": "<extra_id_241>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "256059": {
508
+ "content": "<extra_id_240>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "256060": {
516
+ "content": "<extra_id_239>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "256061": {
524
+ "content": "<extra_id_238>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "256062": {
532
+ "content": "<extra_id_237>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "256063": {
540
+ "content": "<extra_id_236>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "256064": {
548
+ "content": "<extra_id_235>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "256065": {
556
+ "content": "<extra_id_234>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "256066": {
564
+ "content": "<extra_id_233>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "256067": {
572
+ "content": "<extra_id_232>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "256068": {
580
+ "content": "<extra_id_231>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "256069": {
588
+ "content": "<extra_id_230>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "256070": {
596
+ "content": "<extra_id_229>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "256071": {
604
+ "content": "<extra_id_228>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "256072": {
612
+ "content": "<extra_id_227>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "256073": {
620
+ "content": "<extra_id_226>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "256074": {
628
+ "content": "<extra_id_225>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "256075": {
636
+ "content": "<extra_id_224>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "256076": {
644
+ "content": "<extra_id_223>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "256077": {
652
+ "content": "<extra_id_222>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "256078": {
660
+ "content": "<extra_id_221>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "256079": {
668
+ "content": "<extra_id_220>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "256080": {
676
+ "content": "<extra_id_219>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "256081": {
684
+ "content": "<extra_id_218>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "256082": {
692
+ "content": "<extra_id_217>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "256083": {
700
+ "content": "<extra_id_216>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "256084": {
708
+ "content": "<extra_id_215>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "256085": {
716
+ "content": "<extra_id_214>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "256086": {
724
+ "content": "<extra_id_213>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "256087": {
732
+ "content": "<extra_id_212>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "256088": {
740
+ "content": "<extra_id_211>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "256089": {
748
+ "content": "<extra_id_210>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "256090": {
756
+ "content": "<extra_id_209>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "256091": {
764
+ "content": "<extra_id_208>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "256092": {
772
+ "content": "<extra_id_207>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "256093": {
780
+ "content": "<extra_id_206>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "256094": {
788
+ "content": "<extra_id_205>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "256095": {
796
+ "content": "<extra_id_204>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "256096": {
804
+ "content": "<extra_id_203>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "256097": {
812
+ "content": "<extra_id_202>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "256098": {
820
+ "content": "<extra_id_201>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "256099": {
828
+ "content": "<extra_id_200>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "256100": {
836
+ "content": "<extra_id_199>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "256101": {
844
+ "content": "<extra_id_198>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "256102": {
852
+ "content": "<extra_id_197>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "256103": {
860
+ "content": "<extra_id_196>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "256104": {
868
+ "content": "<extra_id_195>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "256105": {
876
+ "content": "<extra_id_194>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "256106": {
884
+ "content": "<extra_id_193>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "256107": {
892
+ "content": "<extra_id_192>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "256108": {
900
+ "content": "<extra_id_191>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "256109": {
908
+ "content": "<extra_id_190>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "256110": {
916
+ "content": "<extra_id_189>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "256111": {
924
+ "content": "<extra_id_188>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "256112": {
932
+ "content": "<extra_id_187>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "256113": {
940
+ "content": "<extra_id_186>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "256114": {
948
+ "content": "<extra_id_185>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "256115": {
956
+ "content": "<extra_id_184>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "256116": {
964
+ "content": "<extra_id_183>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "256117": {
972
+ "content": "<extra_id_182>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "256118": {
980
+ "content": "<extra_id_181>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "256119": {
988
+ "content": "<extra_id_180>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "256120": {
996
+ "content": "<extra_id_179>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "256121": {
1004
+ "content": "<extra_id_178>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "256122": {
1012
+ "content": "<extra_id_177>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "256123": {
1020
+ "content": "<extra_id_176>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "256124": {
1028
+ "content": "<extra_id_175>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "256125": {
1036
+ "content": "<extra_id_174>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "256126": {
1044
+ "content": "<extra_id_173>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "256127": {
1052
+ "content": "<extra_id_172>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "256128": {
1060
+ "content": "<extra_id_171>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "256129": {
1068
+ "content": "<extra_id_170>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "256130": {
1076
+ "content": "<extra_id_169>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "256131": {
1084
+ "content": "<extra_id_168>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "256132": {
1092
+ "content": "<extra_id_167>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "256133": {
1100
+ "content": "<extra_id_166>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "256134": {
1108
+ "content": "<extra_id_165>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "256135": {
1116
+ "content": "<extra_id_164>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "256136": {
1124
+ "content": "<extra_id_163>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "256137": {
1132
+ "content": "<extra_id_162>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "256138": {
1140
+ "content": "<extra_id_161>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "256139": {
1148
+ "content": "<extra_id_160>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "256140": {
1156
+ "content": "<extra_id_159>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "256141": {
1164
+ "content": "<extra_id_158>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "256142": {
1172
+ "content": "<extra_id_157>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "256143": {
1180
+ "content": "<extra_id_156>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "256144": {
1188
+ "content": "<extra_id_155>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "256145": {
1196
+ "content": "<extra_id_154>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "256146": {
1204
+ "content": "<extra_id_153>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "256147": {
1212
+ "content": "<extra_id_152>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "256148": {
1220
+ "content": "<extra_id_151>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "256149": {
1228
+ "content": "<extra_id_150>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "256150": {
1236
+ "content": "<extra_id_149>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "256151": {
1244
+ "content": "<extra_id_148>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "256152": {
1252
+ "content": "<extra_id_147>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "256153": {
1260
+ "content": "<extra_id_146>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "256154": {
1268
+ "content": "<extra_id_145>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "256155": {
1276
+ "content": "<extra_id_144>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "256156": {
1284
+ "content": "<extra_id_143>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "256157": {
1292
+ "content": "<extra_id_142>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "256158": {
1300
+ "content": "<extra_id_141>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "256159": {
1308
+ "content": "<extra_id_140>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "256160": {
1316
+ "content": "<extra_id_139>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "256161": {
1324
+ "content": "<extra_id_138>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "256162": {
1332
+ "content": "<extra_id_137>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "256163": {
1340
+ "content": "<extra_id_136>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "256164": {
1348
+ "content": "<extra_id_135>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "256165": {
1356
+ "content": "<extra_id_134>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "256166": {
1364
+ "content": "<extra_id_133>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "256167": {
1372
+ "content": "<extra_id_132>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "256168": {
1380
+ "content": "<extra_id_131>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "256169": {
1388
+ "content": "<extra_id_130>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "256170": {
1396
+ "content": "<extra_id_129>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "256171": {
1404
+ "content": "<extra_id_128>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "256172": {
1412
+ "content": "<extra_id_127>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "256173": {
1420
+ "content": "<extra_id_126>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "256174": {
1428
+ "content": "<extra_id_125>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "256175": {
1436
+ "content": "<extra_id_124>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "256176": {
1444
+ "content": "<extra_id_123>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "256177": {
1452
+ "content": "<extra_id_122>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "256178": {
1460
+ "content": "<extra_id_121>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "256179": {
1468
+ "content": "<extra_id_120>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "256180": {
1476
+ "content": "<extra_id_119>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "256181": {
1484
+ "content": "<extra_id_118>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "256182": {
1492
+ "content": "<extra_id_117>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "256183": {
1500
+ "content": "<extra_id_116>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "256184": {
1508
+ "content": "<extra_id_115>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "256185": {
1516
+ "content": "<extra_id_114>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "256186": {
1524
+ "content": "<extra_id_113>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "256187": {
1532
+ "content": "<extra_id_112>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "256188": {
1540
+ "content": "<extra_id_111>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "256189": {
1548
+ "content": "<extra_id_110>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "256190": {
1556
+ "content": "<extra_id_109>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "256191": {
1564
+ "content": "<extra_id_108>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "256192": {
1572
+ "content": "<extra_id_107>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "256193": {
1580
+ "content": "<extra_id_106>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "256194": {
1588
+ "content": "<extra_id_105>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "256195": {
1596
+ "content": "<extra_id_104>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "256196": {
1604
+ "content": "<extra_id_103>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "256197": {
1612
+ "content": "<extra_id_102>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "256198": {
1620
+ "content": "<extra_id_101>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "256199": {
1628
+ "content": "<extra_id_100>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "256200": {
1636
+ "content": "<extra_id_99>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "256201": {
1644
+ "content": "<extra_id_98>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "256202": {
1652
+ "content": "<extra_id_97>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "256203": {
1660
+ "content": "<extra_id_96>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "256204": {
1668
+ "content": "<extra_id_95>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "256205": {
1676
+ "content": "<extra_id_94>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "256206": {
1684
+ "content": "<extra_id_93>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "256207": {
1692
+ "content": "<extra_id_92>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "256208": {
1700
+ "content": "<extra_id_91>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "256209": {
1708
+ "content": "<extra_id_90>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "256210": {
1716
+ "content": "<extra_id_89>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "256211": {
1724
+ "content": "<extra_id_88>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "256212": {
1732
+ "content": "<extra_id_87>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "256213": {
1740
+ "content": "<extra_id_86>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "256214": {
1748
+ "content": "<extra_id_85>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "256215": {
1756
+ "content": "<extra_id_84>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "256216": {
1764
+ "content": "<extra_id_83>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "256217": {
1772
+ "content": "<extra_id_82>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "256218": {
1780
+ "content": "<extra_id_81>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "256219": {
1788
+ "content": "<extra_id_80>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "256220": {
1796
+ "content": "<extra_id_79>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "256221": {
1804
+ "content": "<extra_id_78>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "256222": {
1812
+ "content": "<extra_id_77>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "256223": {
1820
+ "content": "<extra_id_76>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "256224": {
1828
+ "content": "<extra_id_75>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "256225": {
1836
+ "content": "<extra_id_74>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "256226": {
1844
+ "content": "<extra_id_73>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "256227": {
1852
+ "content": "<extra_id_72>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "256228": {
1860
+ "content": "<extra_id_71>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "256229": {
1868
+ "content": "<extra_id_70>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "256230": {
1876
+ "content": "<extra_id_69>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "256231": {
1884
+ "content": "<extra_id_68>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "256232": {
1892
+ "content": "<extra_id_67>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "256233": {
1900
+ "content": "<extra_id_66>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "256234": {
1908
+ "content": "<extra_id_65>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "256235": {
1916
+ "content": "<extra_id_64>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "256236": {
1924
+ "content": "<extra_id_63>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "256237": {
1932
+ "content": "<extra_id_62>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "256238": {
1940
+ "content": "<extra_id_61>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "256239": {
1948
+ "content": "<extra_id_60>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "256240": {
1956
+ "content": "<extra_id_59>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "256241": {
1964
+ "content": "<extra_id_58>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "256242": {
1972
+ "content": "<extra_id_57>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "256243": {
1980
+ "content": "<extra_id_56>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "256244": {
1988
+ "content": "<extra_id_55>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "256245": {
1996
+ "content": "<extra_id_54>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "256246": {
2004
+ "content": "<extra_id_53>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "256247": {
2012
+ "content": "<extra_id_52>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "256248": {
2020
+ "content": "<extra_id_51>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "256249": {
2028
+ "content": "<extra_id_50>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "256250": {
2036
+ "content": "<extra_id_49>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "256251": {
2044
+ "content": "<extra_id_48>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ },
2051
+ "256252": {
2052
+ "content": "<extra_id_47>",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
+ },
2059
+ "256253": {
2060
+ "content": "<extra_id_46>",
2061
+ "lstrip": false,
2062
+ "normalized": false,
2063
+ "rstrip": false,
2064
+ "single_word": false,
2065
+ "special": true
2066
+ },
2067
+ "256254": {
2068
+ "content": "<extra_id_45>",
2069
+ "lstrip": false,
2070
+ "normalized": false,
2071
+ "rstrip": false,
2072
+ "single_word": false,
2073
+ "special": true
2074
+ },
2075
+ "256255": {
2076
+ "content": "<extra_id_44>",
2077
+ "lstrip": false,
2078
+ "normalized": false,
2079
+ "rstrip": false,
2080
+ "single_word": false,
2081
+ "special": true
2082
+ },
2083
+ "256256": {
2084
+ "content": "<extra_id_43>",
2085
+ "lstrip": false,
2086
+ "normalized": false,
2087
+ "rstrip": false,
2088
+ "single_word": false,
2089
+ "special": true
2090
+ },
2091
+ "256257": {
2092
+ "content": "<extra_id_42>",
2093
+ "lstrip": false,
2094
+ "normalized": false,
2095
+ "rstrip": false,
2096
+ "single_word": false,
2097
+ "special": true
2098
+ },
2099
+ "256258": {
2100
+ "content": "<extra_id_41>",
2101
+ "lstrip": false,
2102
+ "normalized": false,
2103
+ "rstrip": false,
2104
+ "single_word": false,
2105
+ "special": true
2106
+ },
2107
+ "256259": {
2108
+ "content": "<extra_id_40>",
2109
+ "lstrip": false,
2110
+ "normalized": false,
2111
+ "rstrip": false,
2112
+ "single_word": false,
2113
+ "special": true
2114
+ },
2115
+ "256260": {
2116
+ "content": "<extra_id_39>",
2117
+ "lstrip": false,
2118
+ "normalized": false,
2119
+ "rstrip": false,
2120
+ "single_word": false,
2121
+ "special": true
2122
+ },
2123
+ "256261": {
2124
+ "content": "<extra_id_38>",
2125
+ "lstrip": false,
2126
+ "normalized": false,
2127
+ "rstrip": false,
2128
+ "single_word": false,
2129
+ "special": true
2130
+ },
2131
+ "256262": {
2132
+ "content": "<extra_id_37>",
2133
+ "lstrip": false,
2134
+ "normalized": false,
2135
+ "rstrip": false,
2136
+ "single_word": false,
2137
+ "special": true
2138
+ },
2139
+ "256263": {
2140
+ "content": "<extra_id_36>",
2141
+ "lstrip": false,
2142
+ "normalized": false,
2143
+ "rstrip": false,
2144
+ "single_word": false,
2145
+ "special": true
2146
+ },
2147
+ "256264": {
2148
+ "content": "<extra_id_35>",
2149
+ "lstrip": false,
2150
+ "normalized": false,
2151
+ "rstrip": false,
2152
+ "single_word": false,
2153
+ "special": true
2154
+ },
2155
+ "256265": {
2156
+ "content": "<extra_id_34>",
2157
+ "lstrip": false,
2158
+ "normalized": false,
2159
+ "rstrip": false,
2160
+ "single_word": false,
2161
+ "special": true
2162
+ },
2163
+ "256266": {
2164
+ "content": "<extra_id_33>",
2165
+ "lstrip": false,
2166
+ "normalized": false,
2167
+ "rstrip": false,
2168
+ "single_word": false,
2169
+ "special": true
2170
+ },
2171
+ "256267": {
2172
+ "content": "<extra_id_32>",
2173
+ "lstrip": false,
2174
+ "normalized": false,
2175
+ "rstrip": false,
2176
+ "single_word": false,
2177
+ "special": true
2178
+ },
2179
+ "256268": {
2180
+ "content": "<extra_id_31>",
2181
+ "lstrip": false,
2182
+ "normalized": false,
2183
+ "rstrip": false,
2184
+ "single_word": false,
2185
+ "special": true
2186
+ },
2187
+ "256269": {
2188
+ "content": "<extra_id_30>",
2189
+ "lstrip": false,
2190
+ "normalized": false,
2191
+ "rstrip": false,
2192
+ "single_word": false,
2193
+ "special": true
2194
+ },
2195
+ "256270": {
2196
+ "content": "<extra_id_29>",
2197
+ "lstrip": false,
2198
+ "normalized": false,
2199
+ "rstrip": false,
2200
+ "single_word": false,
2201
+ "special": true
2202
+ },
2203
+ "256271": {
2204
+ "content": "<extra_id_28>",
2205
+ "lstrip": false,
2206
+ "normalized": false,
2207
+ "rstrip": false,
2208
+ "single_word": false,
2209
+ "special": true
2210
+ },
2211
+ "256272": {
2212
+ "content": "<extra_id_27>",
2213
+ "lstrip": false,
2214
+ "normalized": false,
2215
+ "rstrip": false,
2216
+ "single_word": false,
2217
+ "special": true
2218
+ },
2219
+ "256273": {
2220
+ "content": "<extra_id_26>",
2221
+ "lstrip": false,
2222
+ "normalized": false,
2223
+ "rstrip": false,
2224
+ "single_word": false,
2225
+ "special": true
2226
+ },
2227
+ "256274": {
2228
+ "content": "<extra_id_25>",
2229
+ "lstrip": false,
2230
+ "normalized": false,
2231
+ "rstrip": false,
2232
+ "single_word": false,
2233
+ "special": true
2234
+ },
2235
+ "256275": {
2236
+ "content": "<extra_id_24>",
2237
+ "lstrip": false,
2238
+ "normalized": false,
2239
+ "rstrip": false,
2240
+ "single_word": false,
2241
+ "special": true
2242
+ },
2243
+ "256276": {
2244
+ "content": "<extra_id_23>",
2245
+ "lstrip": false,
2246
+ "normalized": false,
2247
+ "rstrip": false,
2248
+ "single_word": false,
2249
+ "special": true
2250
+ },
2251
+ "256277": {
2252
+ "content": "<extra_id_22>",
2253
+ "lstrip": false,
2254
+ "normalized": false,
2255
+ "rstrip": false,
2256
+ "single_word": false,
2257
+ "special": true
2258
+ },
2259
+ "256278": {
2260
+ "content": "<extra_id_21>",
2261
+ "lstrip": false,
2262
+ "normalized": false,
2263
+ "rstrip": false,
2264
+ "single_word": false,
2265
+ "special": true
2266
+ },
2267
+ "256279": {
2268
+ "content": "<extra_id_20>",
2269
+ "lstrip": false,
2270
+ "normalized": false,
2271
+ "rstrip": false,
2272
+ "single_word": false,
2273
+ "special": true
2274
+ },
2275
+ "256280": {
2276
+ "content": "<extra_id_19>",
2277
+ "lstrip": false,
2278
+ "normalized": false,
2279
+ "rstrip": false,
2280
+ "single_word": false,
2281
+ "special": true
2282
+ },
2283
+ "256281": {
2284
+ "content": "<extra_id_18>",
2285
+ "lstrip": false,
2286
+ "normalized": false,
2287
+ "rstrip": false,
2288
+ "single_word": false,
2289
+ "special": true
2290
+ },
2291
+ "256282": {
2292
+ "content": "<extra_id_17>",
2293
+ "lstrip": false,
2294
+ "normalized": false,
2295
+ "rstrip": false,
2296
+ "single_word": false,
2297
+ "special": true
2298
+ },
2299
+ "256283": {
2300
+ "content": "<extra_id_16>",
2301
+ "lstrip": false,
2302
+ "normalized": false,
2303
+ "rstrip": false,
2304
+ "single_word": false,
2305
+ "special": true
2306
+ },
2307
+ "256284": {
2308
+ "content": "<extra_id_15>",
2309
+ "lstrip": false,
2310
+ "normalized": false,
2311
+ "rstrip": false,
2312
+ "single_word": false,
2313
+ "special": true
2314
+ },
2315
+ "256285": {
2316
+ "content": "<extra_id_14>",
2317
+ "lstrip": false,
2318
+ "normalized": false,
2319
+ "rstrip": false,
2320
+ "single_word": false,
2321
+ "special": true
2322
+ },
2323
+ "256286": {
2324
+ "content": "<extra_id_13>",
2325
+ "lstrip": false,
2326
+ "normalized": false,
2327
+ "rstrip": false,
2328
+ "single_word": false,
2329
+ "special": true
2330
+ },
2331
+ "256287": {
2332
+ "content": "<extra_id_12>",
2333
+ "lstrip": false,
2334
+ "normalized": false,
2335
+ "rstrip": false,
2336
+ "single_word": false,
2337
+ "special": true
2338
+ },
2339
+ "256288": {
2340
+ "content": "<extra_id_11>",
2341
+ "lstrip": false,
2342
+ "normalized": false,
2343
+ "rstrip": false,
2344
+ "single_word": false,
2345
+ "special": true
2346
+ },
2347
+ "256289": {
2348
+ "content": "<extra_id_10>",
2349
+ "lstrip": false,
2350
+ "normalized": false,
2351
+ "rstrip": false,
2352
+ "single_word": false,
2353
+ "special": true
2354
+ },
2355
+ "256290": {
2356
+ "content": "<extra_id_9>",
2357
+ "lstrip": false,
2358
+ "normalized": false,
2359
+ "rstrip": false,
2360
+ "single_word": false,
2361
+ "special": true
2362
+ },
2363
+ "256291": {
2364
+ "content": "<extra_id_8>",
2365
+ "lstrip": false,
2366
+ "normalized": false,
2367
+ "rstrip": false,
2368
+ "single_word": false,
2369
+ "special": true
2370
+ },
2371
+ "256292": {
2372
+ "content": "<extra_id_7>",
2373
+ "lstrip": false,
2374
+ "normalized": false,
2375
+ "rstrip": false,
2376
+ "single_word": false,
2377
+ "special": true
2378
+ },
2379
+ "256293": {
2380
+ "content": "<extra_id_6>",
2381
+ "lstrip": false,
2382
+ "normalized": false,
2383
+ "rstrip": false,
2384
+ "single_word": false,
2385
+ "special": true
2386
+ },
2387
+ "256294": {
2388
+ "content": "<extra_id_5>",
2389
+ "lstrip": false,
2390
+ "normalized": false,
2391
+ "rstrip": false,
2392
+ "single_word": false,
2393
+ "special": true
2394
+ },
2395
+ "256295": {
2396
+ "content": "<extra_id_4>",
2397
+ "lstrip": false,
2398
+ "normalized": false,
2399
+ "rstrip": false,
2400
+ "single_word": false,
2401
+ "special": true
2402
+ },
2403
+ "256296": {
2404
+ "content": "<extra_id_3>",
2405
+ "lstrip": false,
2406
+ "normalized": false,
2407
+ "rstrip": false,
2408
+ "single_word": false,
2409
+ "special": true
2410
+ },
2411
+ "256297": {
2412
+ "content": "<extra_id_2>",
2413
+ "lstrip": false,
2414
+ "normalized": false,
2415
+ "rstrip": false,
2416
+ "single_word": false,
2417
+ "special": true
2418
+ },
2419
+ "256298": {
2420
+ "content": "<extra_id_1>",
2421
+ "lstrip": false,
2422
+ "normalized": false,
2423
+ "rstrip": false,
2424
+ "single_word": false,
2425
+ "special": true
2426
+ },
2427
+ "256299": {
2428
+ "content": "<extra_id_0>",
2429
+ "lstrip": false,
2430
+ "normalized": false,
2431
+ "rstrip": false,
2432
+ "single_word": false,
2433
+ "special": true
2434
+ }
2435
+ },
2436
+ "additional_special_tokens": [
2437
+ "<extra_id_0>",
2438
+ "<extra_id_1>",
2439
+ "<extra_id_2>",
2440
+ "<extra_id_3>",
2441
+ "<extra_id_4>",
2442
+ "<extra_id_5>",
2443
+ "<extra_id_6>",
2444
+ "<extra_id_7>",
2445
+ "<extra_id_8>",
2446
+ "<extra_id_9>",
2447
+ "<extra_id_10>",
2448
+ "<extra_id_11>",
2449
+ "<extra_id_12>",
2450
+ "<extra_id_13>",
2451
+ "<extra_id_14>",
2452
+ "<extra_id_15>",
2453
+ "<extra_id_16>",
2454
+ "<extra_id_17>",
2455
+ "<extra_id_18>",
2456
+ "<extra_id_19>",
2457
+ "<extra_id_20>",
2458
+ "<extra_id_21>",
2459
+ "<extra_id_22>",
2460
+ "<extra_id_23>",
2461
+ "<extra_id_24>",
2462
+ "<extra_id_25>",
2463
+ "<extra_id_26>",
2464
+ "<extra_id_27>",
2465
+ "<extra_id_28>",
2466
+ "<extra_id_29>",
2467
+ "<extra_id_30>",
2468
+ "<extra_id_31>",
2469
+ "<extra_id_32>",
2470
+ "<extra_id_33>",
2471
+ "<extra_id_34>",
2472
+ "<extra_id_35>",
2473
+ "<extra_id_36>",
2474
+ "<extra_id_37>",
2475
+ "<extra_id_38>",
2476
+ "<extra_id_39>",
2477
+ "<extra_id_40>",
2478
+ "<extra_id_41>",
2479
+ "<extra_id_42>",
2480
+ "<extra_id_43>",
2481
+ "<extra_id_44>",
2482
+ "<extra_id_45>",
2483
+ "<extra_id_46>",
2484
+ "<extra_id_47>",
2485
+ "<extra_id_48>",
2486
+ "<extra_id_49>",
2487
+ "<extra_id_50>",
2488
+ "<extra_id_51>",
2489
+ "<extra_id_52>",
2490
+ "<extra_id_53>",
2491
+ "<extra_id_54>",
2492
+ "<extra_id_55>",
2493
+ "<extra_id_56>",
2494
+ "<extra_id_57>",
2495
+ "<extra_id_58>",
2496
+ "<extra_id_59>",
2497
+ "<extra_id_60>",
2498
+ "<extra_id_61>",
2499
+ "<extra_id_62>",
2500
+ "<extra_id_63>",
2501
+ "<extra_id_64>",
2502
+ "<extra_id_65>",
2503
+ "<extra_id_66>",
2504
+ "<extra_id_67>",
2505
+ "<extra_id_68>",
2506
+ "<extra_id_69>",
2507
+ "<extra_id_70>",
2508
+ "<extra_id_71>",
2509
+ "<extra_id_72>",
2510
+ "<extra_id_73>",
2511
+ "<extra_id_74>",
2512
+ "<extra_id_75>",
2513
+ "<extra_id_76>",
2514
+ "<extra_id_77>",
2515
+ "<extra_id_78>",
2516
+ "<extra_id_79>",
2517
+ "<extra_id_80>",
2518
+ "<extra_id_81>",
2519
+ "<extra_id_82>",
2520
+ "<extra_id_83>",
2521
+ "<extra_id_84>",
2522
+ "<extra_id_85>",
2523
+ "<extra_id_86>",
2524
+ "<extra_id_87>",
2525
+ "<extra_id_88>",
2526
+ "<extra_id_89>",
2527
+ "<extra_id_90>",
2528
+ "<extra_id_91>",
2529
+ "<extra_id_92>",
2530
+ "<extra_id_93>",
2531
+ "<extra_id_94>",
2532
+ "<extra_id_95>",
2533
+ "<extra_id_96>",
2534
+ "<extra_id_97>",
2535
+ "<extra_id_98>",
2536
+ "<extra_id_99>",
2537
+ "<extra_id_100>",
2538
+ "<extra_id_101>",
2539
+ "<extra_id_102>",
2540
+ "<extra_id_103>",
2541
+ "<extra_id_104>",
2542
+ "<extra_id_105>",
2543
+ "<extra_id_106>",
2544
+ "<extra_id_107>",
2545
+ "<extra_id_108>",
2546
+ "<extra_id_109>",
2547
+ "<extra_id_110>",
2548
+ "<extra_id_111>",
2549
+ "<extra_id_112>",
2550
+ "<extra_id_113>",
2551
+ "<extra_id_114>",
2552
+ "<extra_id_115>",
2553
+ "<extra_id_116>",
2554
+ "<extra_id_117>",
2555
+ "<extra_id_118>",
2556
+ "<extra_id_119>",
2557
+ "<extra_id_120>",
2558
+ "<extra_id_121>",
2559
+ "<extra_id_122>",
2560
+ "<extra_id_123>",
2561
+ "<extra_id_124>",
2562
+ "<extra_id_125>",
2563
+ "<extra_id_126>",
2564
+ "<extra_id_127>",
2565
+ "<extra_id_128>",
2566
+ "<extra_id_129>",
2567
+ "<extra_id_130>",
2568
+ "<extra_id_131>",
2569
+ "<extra_id_132>",
2570
+ "<extra_id_133>",
2571
+ "<extra_id_134>",
2572
+ "<extra_id_135>",
2573
+ "<extra_id_136>",
2574
+ "<extra_id_137>",
2575
+ "<extra_id_138>",
2576
+ "<extra_id_139>",
2577
+ "<extra_id_140>",
2578
+ "<extra_id_141>",
2579
+ "<extra_id_142>",
2580
+ "<extra_id_143>",
2581
+ "<extra_id_144>",
2582
+ "<extra_id_145>",
2583
+ "<extra_id_146>",
2584
+ "<extra_id_147>",
2585
+ "<extra_id_148>",
2586
+ "<extra_id_149>",
2587
+ "<extra_id_150>",
2588
+ "<extra_id_151>",
2589
+ "<extra_id_152>",
2590
+ "<extra_id_153>",
2591
+ "<extra_id_154>",
2592
+ "<extra_id_155>",
2593
+ "<extra_id_156>",
2594
+ "<extra_id_157>",
2595
+ "<extra_id_158>",
2596
+ "<extra_id_159>",
2597
+ "<extra_id_160>",
2598
+ "<extra_id_161>",
2599
+ "<extra_id_162>",
2600
+ "<extra_id_163>",
2601
+ "<extra_id_164>",
2602
+ "<extra_id_165>",
2603
+ "<extra_id_166>",
2604
+ "<extra_id_167>",
2605
+ "<extra_id_168>",
2606
+ "<extra_id_169>",
2607
+ "<extra_id_170>",
2608
+ "<extra_id_171>",
2609
+ "<extra_id_172>",
2610
+ "<extra_id_173>",
2611
+ "<extra_id_174>",
2612
+ "<extra_id_175>",
2613
+ "<extra_id_176>",
2614
+ "<extra_id_177>",
2615
+ "<extra_id_178>",
2616
+ "<extra_id_179>",
2617
+ "<extra_id_180>",
2618
+ "<extra_id_181>",
2619
+ "<extra_id_182>",
2620
+ "<extra_id_183>",
2621
+ "<extra_id_184>",
2622
+ "<extra_id_185>",
2623
+ "<extra_id_186>",
2624
+ "<extra_id_187>",
2625
+ "<extra_id_188>",
2626
+ "<extra_id_189>",
2627
+ "<extra_id_190>",
2628
+ "<extra_id_191>",
2629
+ "<extra_id_192>",
2630
+ "<extra_id_193>",
2631
+ "<extra_id_194>",
2632
+ "<extra_id_195>",
2633
+ "<extra_id_196>",
2634
+ "<extra_id_197>",
2635
+ "<extra_id_198>",
2636
+ "<extra_id_199>",
2637
+ "<extra_id_200>",
2638
+ "<extra_id_201>",
2639
+ "<extra_id_202>",
2640
+ "<extra_id_203>",
2641
+ "<extra_id_204>",
2642
+ "<extra_id_205>",
2643
+ "<extra_id_206>",
2644
+ "<extra_id_207>",
2645
+ "<extra_id_208>",
2646
+ "<extra_id_209>",
2647
+ "<extra_id_210>",
2648
+ "<extra_id_211>",
2649
+ "<extra_id_212>",
2650
+ "<extra_id_213>",
2651
+ "<extra_id_214>",
2652
+ "<extra_id_215>",
2653
+ "<extra_id_216>",
2654
+ "<extra_id_217>",
2655
+ "<extra_id_218>",
2656
+ "<extra_id_219>",
2657
+ "<extra_id_220>",
2658
+ "<extra_id_221>",
2659
+ "<extra_id_222>",
2660
+ "<extra_id_223>",
2661
+ "<extra_id_224>",
2662
+ "<extra_id_225>",
2663
+ "<extra_id_226>",
2664
+ "<extra_id_227>",
2665
+ "<extra_id_228>",
2666
+ "<extra_id_229>",
2667
+ "<extra_id_230>",
2668
+ "<extra_id_231>",
2669
+ "<extra_id_232>",
2670
+ "<extra_id_233>",
2671
+ "<extra_id_234>",
2672
+ "<extra_id_235>",
2673
+ "<extra_id_236>",
2674
+ "<extra_id_237>",
2675
+ "<extra_id_238>",
2676
+ "<extra_id_239>",
2677
+ "<extra_id_240>",
2678
+ "<extra_id_241>",
2679
+ "<extra_id_242>",
2680
+ "<extra_id_243>",
2681
+ "<extra_id_244>",
2682
+ "<extra_id_245>",
2683
+ "<extra_id_246>",
2684
+ "<extra_id_247>",
2685
+ "<extra_id_248>",
2686
+ "<extra_id_249>",
2687
+ "<extra_id_250>",
2688
+ "<extra_id_251>",
2689
+ "<extra_id_252>",
2690
+ "<extra_id_253>",
2691
+ "<extra_id_254>",
2692
+ "<extra_id_255>",
2693
+ "<extra_id_256>",
2694
+ "<extra_id_257>",
2695
+ "<extra_id_258>",
2696
+ "<extra_id_259>",
2697
+ "<extra_id_260>",
2698
+ "<extra_id_261>",
2699
+ "<extra_id_262>",
2700
+ "<extra_id_263>",
2701
+ "<extra_id_264>",
2702
+ "<extra_id_265>",
2703
+ "<extra_id_266>",
2704
+ "<extra_id_267>",
2705
+ "<extra_id_268>",
2706
+ "<extra_id_269>",
2707
+ "<extra_id_270>",
2708
+ "<extra_id_271>",
2709
+ "<extra_id_272>",
2710
+ "<extra_id_273>",
2711
+ "<extra_id_274>",
2712
+ "<extra_id_275>",
2713
+ "<extra_id_276>",
2714
+ "<extra_id_277>",
2715
+ "<extra_id_278>",
2716
+ "<extra_id_279>",
2717
+ "<extra_id_280>",
2718
+ "<extra_id_281>",
2719
+ "<extra_id_282>",
2720
+ "<extra_id_283>",
2721
+ "<extra_id_284>",
2722
+ "<extra_id_285>",
2723
+ "<extra_id_286>",
2724
+ "<extra_id_287>",
2725
+ "<extra_id_288>",
2726
+ "<extra_id_289>",
2727
+ "<extra_id_290>",
2728
+ "<extra_id_291>",
2729
+ "<extra_id_292>",
2730
+ "<extra_id_293>",
2731
+ "<extra_id_294>",
2732
+ "<extra_id_295>",
2733
+ "<extra_id_296>",
2734
+ "<extra_id_297>",
2735
+ "<extra_id_298>",
2736
+ "<extra_id_299>"
2737
+ ],
2738
+ "bos_token": "<s>",
2739
+ "clean_up_tokenization_spaces": true,
2740
+ "eos_token": "</s>",
2741
+ "extra_ids": 300,
2742
+ "model_max_length": 1000000000000000019884624838656,
2743
+ "pad_token": "<pad>",
2744
+ "sp_model_kwargs": {},
2745
+ "spaces_between_special_tokens": false,
2746
+ "tokenizer_class": "T5Tokenizer",
2747
+ "unk_token": "<unk>"
2748
+ }
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/.msc ADDED
Binary file (2.33 kB). View file
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/.mv ADDED
@@ -0,0 +1 @@
 
 
1
+ Revision:master,CreatedAt:1730986758
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/README.md ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ datasets:
4
+ - common_voice
5
+ - mozilla-foundation/common_voice_6_0
6
+ metrics:
7
+ - wer
8
+ - cer
9
+ tags:
10
+ - audio
11
+ - automatic-speech-recognition
12
+ - en
13
+ - hf-asr-leaderboard
14
+ - mozilla-foundation/common_voice_6_0
15
+ - robust-speech-event
16
+ - speech
17
+ - xlsr-fine-tuning-week
18
+ license: apache-2.0
19
+ model-index:
20
+ - name: XLSR Wav2Vec2 English by Jonatas Grosman
21
+ results:
22
+ - task:
23
+ name: Automatic Speech Recognition
24
+ type: automatic-speech-recognition
25
+ dataset:
26
+ name: Common Voice en
27
+ type: common_voice
28
+ args: en
29
+ metrics:
30
+ - name: Test WER
31
+ type: wer
32
+ value: 19.06
33
+ - name: Test CER
34
+ type: cer
35
+ value: 7.69
36
+ - name: Test WER (+LM)
37
+ type: wer
38
+ value: 14.81
39
+ - name: Test CER (+LM)
40
+ type: cer
41
+ value: 6.84
42
+ - task:
43
+ name: Automatic Speech Recognition
44
+ type: automatic-speech-recognition
45
+ dataset:
46
+ name: Robust Speech Event - Dev Data
47
+ type: speech-recognition-community-v2/dev_data
48
+ args: en
49
+ metrics:
50
+ - name: Dev WER
51
+ type: wer
52
+ value: 27.72
53
+ - name: Dev CER
54
+ type: cer
55
+ value: 11.65
56
+ - name: Dev WER (+LM)
57
+ type: wer
58
+ value: 20.85
59
+ - name: Dev CER (+LM)
60
+ type: cer
61
+ value: 11.01
62
+ ---
63
+
64
+ # Fine-tuned XLSR-53 large model for speech recognition in English
65
+
66
+ Fine-tuned [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on English using the train and validation splits of [Common Voice 6.1](https://huggingface.co/datasets/common_voice).
67
+ When using this model, make sure that your speech input is sampled at 16kHz.
68
+
69
+ This model has been fine-tuned thanks to the GPU credits generously given by the [OVHcloud](https://www.ovhcloud.com/en/public-cloud/ai-training/) :)
70
+
71
+ The script used for training can be found here: https://github.com/jonatasgrosman/wav2vec2-sprint
72
+
73
+ ## Usage
74
+
75
+ The model can be used directly (without a language model) as follows...
76
+
77
+ Using the [HuggingSound](https://github.com/jonatasgrosman/huggingsound) library:
78
+
79
+ ```python
80
+ from huggingsound import SpeechRecognitionModel
81
+
82
+ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
83
+ audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
84
+
85
+ transcriptions = model.transcribe(audio_paths)
86
+ ```
87
+
88
+ Writing your own inference script:
89
+
90
+ ```python
91
+ import torch
92
+ import librosa
93
+ from datasets import load_dataset
94
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
95
+
96
+ LANG_ID = "en"
97
+ MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
98
+ SAMPLES = 10
99
+
100
+ test_dataset = load_dataset("common_voice", LANG_ID, split=f"test[:{SAMPLES}]")
101
+
102
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
103
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
104
+
105
+ # Preprocessing the datasets.
106
+ # We need to read the audio files as arrays
107
+ def speech_file_to_array_fn(batch):
108
+ speech_array, sampling_rate = librosa.load(batch["path"], sr=16_000)
109
+ batch["speech"] = speech_array
110
+ batch["sentence"] = batch["sentence"].upper()
111
+ return batch
112
+
113
+ test_dataset = test_dataset.map(speech_file_to_array_fn)
114
+ inputs = processor(test_dataset["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
115
+
116
+ with torch.no_grad():
117
+ logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
118
+
119
+ predicted_ids = torch.argmax(logits, dim=-1)
120
+ predicted_sentences = processor.batch_decode(predicted_ids)
121
+
122
+ for i, predicted_sentence in enumerate(predicted_sentences):
123
+ print("-" * 100)
124
+ print("Reference:", test_dataset[i]["sentence"])
125
+ print("Prediction:", predicted_sentence)
126
+ ```
127
+
128
+ | Reference | Prediction |
129
+ | ------------- | ------------- |
130
+ | "SHE'LL BE ALL RIGHT." | SHE'LL BE ALL RIGHT |
131
+ | SIX | SIX |
132
+ | "ALL'S WELL THAT ENDS WELL." | ALL AS WELL THAT ENDS WELL |
133
+ | DO YOU MEAN IT? | DO YOU MEAN IT |
134
+ | THE NEW PATCH IS LESS INVASIVE THAN THE OLD ONE, BUT STILL CAUSES REGRESSIONS. | THE NEW PATCH IS LESS INVASIVE THAN THE OLD ONE BUT STILL CAUSES REGRESSION |
135
+ | HOW IS MOZILLA GOING TO HANDLE AMBIGUITIES LIKE QUEUE AND CUE? | HOW IS MOSLILLAR GOING TO HANDLE ANDBEWOOTH HIS LIKE Q AND Q |
136
+ | "I GUESS YOU MUST THINK I'M KINDA BATTY." | RUSTIAN WASTIN PAN ONTE BATTLY |
137
+ | NO ONE NEAR THE REMOTE MACHINE YOU COULD RING? | NO ONE NEAR THE REMOTE MACHINE YOU COULD RING |
138
+ | SAUCE FOR THE GOOSE IS SAUCE FOR THE GANDER. | SAUCE FOR THE GUICE IS SAUCE FOR THE GONDER |
139
+ | GROVES STARTED WRITING SONGS WHEN SHE WAS FOUR YEARS OLD. | GRAFS STARTED WRITING SONGS WHEN SHE WAS FOUR YEARS OLD |
140
+
141
+ ## Evaluation
142
+
143
+ 1. To evaluate on `mozilla-foundation/common_voice_6_0` with split `test`
144
+
145
+ ```bash
146
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset mozilla-foundation/common_voice_6_0 --config en --split test
147
+ ```
148
+
149
+ 2. To evaluate on `speech-recognition-community-v2/dev_data`
150
+
151
+ ```bash
152
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset speech-recognition-community-v2/dev_data --config en --split validation --chunk_length_s 5.0 --stride_length_s 1.0
153
+ ```
154
+
155
+ ## Citation
156
+ If you want to cite this model you can use this:
157
+
158
+ ```bibtex
159
+ @misc{grosman2021xlsr53-large-english,
160
+ title={Fine-tuned {XLSR}-53 large model for speech recognition in {E}nglish},
161
+ author={Grosman, Jonatas},
162
+ howpublished={\url{https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english}},
163
+ year={2021}
164
+ }
165
+ ```
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": ["", "<s>", "</s>", "⁇", " ", "'", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], "is_bpe": false}
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.05,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "conv_bias": true,
11
+ "conv_dim": [
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512
19
+ ],
20
+ "conv_kernel": [
21
+ 10,
22
+ 3,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 2,
27
+ 2
28
+ ],
29
+ "conv_stride": [
30
+ 5,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2
37
+ ],
38
+ "ctc_loss_reduction": "mean",
39
+ "ctc_zero_infinity": true,
40
+ "do_stable_layer_norm": true,
41
+ "eos_token_id": 2,
42
+ "feat_extract_activation": "gelu",
43
+ "feat_extract_dropout": 0.0,
44
+ "feat_extract_norm": "layer",
45
+ "feat_proj_dropout": 0.05,
46
+ "final_dropout": 0.0,
47
+ "hidden_act": "gelu",
48
+ "hidden_dropout": 0.05,
49
+ "hidden_size": 1024,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 4096,
52
+ "layer_norm_eps": 1e-05,
53
+ "layerdrop": 0.05,
54
+ "mask_channel_length": 10,
55
+ "mask_channel_min_space": 1,
56
+ "mask_channel_other": 0.0,
57
+ "mask_channel_prob": 0.0,
58
+ "mask_channel_selection": "static",
59
+ "mask_feature_length": 10,
60
+ "mask_feature_prob": 0.0,
61
+ "mask_time_length": 10,
62
+ "mask_time_min_space": 1,
63
+ "mask_time_other": 0.0,
64
+ "mask_time_prob": 0.05,
65
+ "mask_time_selection": "static",
66
+ "model_type": "wav2vec2",
67
+ "num_attention_heads": 16,
68
+ "num_conv_pos_embedding_groups": 16,
69
+ "num_conv_pos_embeddings": 128,
70
+ "num_feat_extract_layers": 7,
71
+ "num_hidden_layers": 24,
72
+ "pad_token_id": 0,
73
+ "transformers_version": "4.7.0.dev0",
74
+ "vocab_size": 33
75
+ }
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "automatic-speech-recognition", "allow_remote": true}
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/eval.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from datasets import load_dataset, load_metric, Audio, Dataset
3
+ from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, AutoConfig, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
4
+ import re
5
+ import torch
6
+ import argparse
7
+ from typing import Dict
8
+
9
+ def log_results(result: Dataset, args: Dict[str, str]):
10
+ """ DO NOT CHANGE. This function computes and logs the result metrics. """
11
+
12
+ log_outputs = args.log_outputs
13
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
14
+
15
+ # load metric
16
+ wer = load_metric("wer")
17
+ cer = load_metric("cer")
18
+
19
+ # compute metrics
20
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
21
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
22
+
23
+ # print & log results
24
+ result_str = (
25
+ f"WER: {wer_result}\n"
26
+ f"CER: {cer_result}"
27
+ )
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str, invalid_chars_regex: str, to_lower: bool) -> str:
51
+ """ DO ADAPT FOR YOUR USE CASE. this function normalizes the target text. """
52
+
53
+ text = text.lower() if to_lower else text.upper()
54
+
55
+ text = re.sub(invalid_chars_regex, " ", text)
56
+
57
+ text = re.sub("\s+", " ", text).strip()
58
+
59
+ return text
60
+
61
+
62
+ def main(args):
63
+ # load dataset
64
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
65
+
66
+ # for testing: only process the first two examples as a test
67
+ # dataset = dataset.select(range(10))
68
+
69
+ # load processor
70
+ if args.greedy:
71
+ processor = Wav2Vec2Processor.from_pretrained(args.model_id)
72
+ decoder = None
73
+ else:
74
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(args.model_id)
75
+ decoder = processor.decoder
76
+
77
+ feature_extractor = processor.feature_extractor
78
+ tokenizer = processor.tokenizer
79
+
80
+ # resample audio
81
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
82
+
83
+ # load eval pipeline
84
+ if args.device is None:
85
+ args.device = 0 if torch.cuda.is_available() else -1
86
+
87
+ config = AutoConfig.from_pretrained(args.model_id)
88
+ model = AutoModelForCTC.from_pretrained(args.model_id)
89
+
90
+ #asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
91
+ asr = pipeline("automatic-speech-recognition", config=config, model=model, tokenizer=tokenizer,
92
+ feature_extractor=feature_extractor, decoder=decoder, device=args.device)
93
+
94
+ # build normalizer config
95
+ tokenizer = AutoTokenizer.from_pretrained(args.model_id)
96
+ tokens = [x for x in tokenizer.convert_ids_to_tokens(range(0, tokenizer.vocab_size))]
97
+ special_tokens = [
98
+ tokenizer.pad_token, tokenizer.word_delimiter_token,
99
+ tokenizer.unk_token, tokenizer.bos_token,
100
+ tokenizer.eos_token,
101
+ ]
102
+ non_special_tokens = [x for x in tokens if x not in special_tokens]
103
+ invalid_chars_regex = f"[^\s{re.escape(''.join(set(non_special_tokens)))}]"
104
+ normalize_to_lower = False
105
+ for token in non_special_tokens:
106
+ if token.isalpha() and token.islower():
107
+ normalize_to_lower = True
108
+ break
109
+
110
+ # map function to decode audio
111
+ def map_to_pred(batch, args=args, asr=asr, invalid_chars_regex=invalid_chars_regex, normalize_to_lower=normalize_to_lower):
112
+ prediction = asr(batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s)
113
+
114
+ batch["prediction"] = prediction["text"]
115
+ batch["target"] = normalize_text(batch["sentence"], invalid_chars_regex, normalize_to_lower)
116
+ return batch
117
+
118
+ # run inference on all examples
119
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
120
+
121
+ # filtering out empty targets
122
+ result = result.filter(lambda example: example["target"] != "")
123
+
124
+ # compute and log_results
125
+ # do not change function below
126
+ log_results(result, args)
127
+
128
+
129
+ if __name__ == "__main__":
130
+ parser = argparse.ArgumentParser()
131
+
132
+ parser.add_argument(
133
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
134
+ )
135
+ parser.add_argument(
136
+ "--dataset", type=str, required=True, help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets"
137
+ )
138
+ parser.add_argument(
139
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
140
+ )
141
+ parser.add_argument(
142
+ "--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`"
143
+ )
144
+ parser.add_argument(
145
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to None. For long audio files a good value would be 5.0 seconds."
146
+ )
147
+ parser.add_argument(
148
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to None. For long audio files a good value would be 1.0 seconds."
149
+ )
150
+ parser.add_argument(
151
+ "--log_outputs", action='store_true', help="If defined, write outputs to log file for analysis."
152
+ )
153
+ parser.add_argument(
154
+ "--greedy", action='store_true', help="If defined, the LM will be ignored during inference."
155
+ )
156
+ parser.add_argument(
157
+ "--device",
158
+ type=int,
159
+ default=None,
160
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
161
+ )
162
+ args = parser.parse_args()
163
+
164
+ main(args)
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/full_eval.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CV - TEST
2
+
3
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset mozilla-foundation/common_voice_6_0 --config en --split test --log_outputs --greedy
4
+ mv log_mozilla-foundation_common_voice_6_0_en_test_predictions.txt log_mozilla-foundation_common_voice_6_0_en_test_predictions_greedy.txt
5
+ mv mozilla-foundation_common_voice_6_0_en_test_eval_results.txt mozilla-foundation_common_voice_6_0_en_test_eval_results_greedy.txt
6
+
7
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset mozilla-foundation/common_voice_6_0 --config en --split test --log_outputs
8
+
9
+ # HF EVENT - DEV
10
+
11
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset speech-recognition-community-v2/dev_data --config en --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs --greedy
12
+ mv log_speech-recognition-community-v2_dev_data_en_validation_predictions.txt log_speech-recognition-community-v2_dev_data_en_validation_predictions_greedy.txt
13
+ mv speech-recognition-community-v2_dev_data_en_validation_eval_results.txt speech-recognition-community-v2_dev_data_en_validation_eval_results_greedy.txt
14
+
15
+ python eval.py --model_id jonatasgrosman/wav2vec2-large-xlsr-53-english --dataset speech-recognition-community-v2/dev_data --config en --split validation --chunk_length_s 5.0 --stride_length_s 1.0 --log_outputs
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/language_model/unigrams.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_predictions_greedy.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_mozilla-foundation_common_voice_6_0_en_test_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_predictions_greedy.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/log_speech-recognition-community-v2_dev_data_en_validation_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/mozilla-foundation_common_voice_6_0_en_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.1481828839390387
2
+ CER: 0.06848087313203592
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/mozilla-foundation_common_voice_6_0_en_test_eval_results_greedy.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.19067492882264278
2
+ CER: 0.07694957927516068
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000,
9
+ "processor_class": "Wav2Vec2ProcessorWithLM"
10
+ }
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/speech-recognition-community-v2_dev_data_en_validation_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.2085057090848916
2
+ CER: 0.11011805154105943
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/speech-recognition-community-v2_dev_data_en_validation_eval_results_greedy.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.27722157868608305
2
+ CER: 0.11652265190008215
Wan2.2-S2V-14B/wav2vec2-large-xlsr-53-english/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "'": 5, "-": 6, "a": 7, "b": 8, "c": 9, "d": 10, "e": 11, "f": 12, "g": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "q": 23, "r": 24, "s": 25, "t": 26, "u": 27, "v": 28, "w": 29, "x": 30, "y": 31, "z": 32}
Wan2.2/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .DS_Store
3
+ .vscode*
4
+ tmp_examples*
5
+ new_checkpoint*
6
+ batch_test*
7
+ nohup*
Wan2.2/INSTALL.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installation Guide
2
+
3
+ ## Install with pip
4
+
5
+ ```bash
6
+ pip install .
7
+ pip install .[dev] # Installe aussi les outils de dev
8
+ ```
9
+
10
+ ## Install with Poetry
11
+
12
+ Ensure you have [Poetry](https://python-poetry.org/docs/#installation) installed on your system.
13
+
14
+ To install all dependencies:
15
+
16
+ ```bash
17
+ poetry install
18
+ ```
19
+
20
+ ### Handling `flash-attn` Installation Issues
21
+
22
+ If `flash-attn` fails due to **PEP 517 build issues**, you can try one of the following fixes.
23
+
24
+ #### No-Build-Isolation Installation (Recommended)
25
+ ```bash
26
+ poetry run pip install --upgrade pip setuptools wheel
27
+ poetry run pip install flash-attn --no-build-isolation
28
+ poetry install
29
+ ```
30
+
31
+ #### Install from Git (Alternative)
32
+ ```bash
33
+ poetry run pip install git+https://github.com/Dao-AILab/flash-attention.git
34
+ ```
35
+
36
+ ---
37
+
38
+ ### Running the Model
39
+
40
+ Once the installation is complete, you can run **Wan2.2** using:
41
+
42
+ ```bash
43
+ poetry run python generate.py --task t2v-A14B --size '1280*720' --ckpt_dir ./Wan2.2-T2V-A14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
44
+ ```
45
+
46
+ #### Test
47
+ ```bash
48
+ bash tests/test.sh
49
+ ```
50
+
51
+ #### Format
52
+ ```bash
53
+ black .
54
+ isort .
55
+ ```
Wan2.2/Makefile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .PHONY: format
2
+
3
+ format:
4
+ isort generate.py wan
5
+ yapf -i -r *.py generate.py wan
Wan2.2/generate.py ADDED
@@ -0,0 +1,575 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import argparse
3
+ import logging
4
+ import os
5
+ import sys
6
+ import warnings
7
+ from datetime import datetime
8
+
9
+ warnings.filterwarnings('ignore')
10
+
11
+ import random
12
+
13
+ import torch
14
+ import torch.distributed as dist
15
+ from PIL import Image
16
+
17
+ import wan
18
+ from wan.configs import MAX_AREA_CONFIGS, SIZE_CONFIGS, SUPPORTED_SIZES, WAN_CONFIGS
19
+ from wan.distributed.util import init_distributed_group
20
+ from wan.utils.prompt_extend import DashScopePromptExpander, QwenPromptExpander
21
+ from wan.utils.utils import merge_video_audio, save_video, str2bool
22
+
23
+
24
+ EXAMPLE_PROMPT = {
25
+ "t2v-A14B": {
26
+ "prompt":
27
+ "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
28
+ },
29
+ "i2v-A14B": {
30
+ "prompt":
31
+ "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside.",
32
+ "image":
33
+ "examples/i2v_input.JPG",
34
+ },
35
+ "ti2v-5B": {
36
+ "prompt":
37
+ "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage.",
38
+ },
39
+ "animate-14B": {
40
+ "prompt": "视频中的人在做动作",
41
+ "video": "",
42
+ "pose": "",
43
+ "mask": "",
44
+ },
45
+ "s2v-14B": {
46
+ "prompt":
47
+ "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside.",
48
+ "image":
49
+ "examples/i2v_input.JPG",
50
+ "audio":
51
+ "examples/talk.wav",
52
+ "tts_prompt_audio":
53
+ "examples/zero_shot_prompt.wav",
54
+ "tts_prompt_text":
55
+ "希望你以后能够做的比我还好呦。",
56
+ "tts_text":
57
+ "收到好友从远方寄来的生日礼物,那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,笑容如花儿般绽放。"
58
+ },
59
+ }
60
+
61
+
62
+ def _validate_args(args):
63
+ # Basic check
64
+ assert args.ckpt_dir is not None, "Please specify the checkpoint directory."
65
+ assert args.task in WAN_CONFIGS, f"Unsupport task: {args.task}"
66
+ assert args.task in EXAMPLE_PROMPT, f"Unsupport task: {args.task}"
67
+
68
+ if args.prompt is None:
69
+ args.prompt = EXAMPLE_PROMPT[args.task]["prompt"]
70
+ if args.image is None and "image" in EXAMPLE_PROMPT[args.task]:
71
+ args.image = EXAMPLE_PROMPT[args.task]["image"]
72
+ if args.audio is None and args.enable_tts is False and "audio" in EXAMPLE_PROMPT[args.task]:
73
+ args.audio = EXAMPLE_PROMPT[args.task]["audio"]
74
+ if (args.tts_prompt_audio is None or args.tts_text is None) and args.enable_tts is True and "audio" in EXAMPLE_PROMPT[args.task]:
75
+ args.tts_prompt_audio = EXAMPLE_PROMPT[args.task]["tts_prompt_audio"]
76
+ args.tts_prompt_text = EXAMPLE_PROMPT[args.task]["tts_prompt_text"]
77
+ args.tts_text = EXAMPLE_PROMPT[args.task]["tts_text"]
78
+
79
+ if args.task == "i2v-A14B":
80
+ assert args.image is not None, "Please specify the image path for i2v."
81
+
82
+ cfg = WAN_CONFIGS[args.task]
83
+
84
+ if args.sample_steps is None:
85
+ args.sample_steps = cfg.sample_steps
86
+
87
+ if args.sample_shift is None:
88
+ args.sample_shift = cfg.sample_shift
89
+
90
+ if args.sample_guide_scale is None:
91
+ args.sample_guide_scale = cfg.sample_guide_scale
92
+
93
+ if args.frame_num is None:
94
+ args.frame_num = cfg.frame_num
95
+
96
+ args.base_seed = args.base_seed if args.base_seed >= 0 else random.randint(
97
+ 0, sys.maxsize)
98
+ # Size check
99
+ if not 's2v' in args.task:
100
+ assert args.size in SUPPORTED_SIZES[
101
+ args.
102
+ task], f"Unsupport size {args.size} for task {args.task}, supported sizes are: {', '.join(SUPPORTED_SIZES[args.task])}"
103
+
104
+
105
+ def _parse_args():
106
+ parser = argparse.ArgumentParser(
107
+ description="Generate a image or video from a text prompt or image using Wan"
108
+ )
109
+ parser.add_argument(
110
+ "--task",
111
+ type=str,
112
+ default="t2v-A14B",
113
+ choices=list(WAN_CONFIGS.keys()),
114
+ help="The task to run.")
115
+ parser.add_argument(
116
+ "--size",
117
+ type=str,
118
+ default="1280*720",
119
+ choices=list(SIZE_CONFIGS.keys()),
120
+ help="The area (width*height) of the generated video. For the I2V task, the aspect ratio of the output video will follow that of the input image."
121
+ )
122
+ parser.add_argument(
123
+ "--frame_num",
124
+ type=int,
125
+ default=None,
126
+ help="How many frames of video are generated. The number should be 4n+1"
127
+ )
128
+ parser.add_argument(
129
+ "--ckpt_dir",
130
+ type=str,
131
+ default=None,
132
+ help="The path to the checkpoint directory.")
133
+ parser.add_argument(
134
+ "--offload_model",
135
+ type=str2bool,
136
+ default=None,
137
+ help="Whether to offload the model to CPU after each model forward, reducing GPU memory usage."
138
+ )
139
+ parser.add_argument(
140
+ "--ulysses_size",
141
+ type=int,
142
+ default=1,
143
+ help="The size of the ulysses parallelism in DiT.")
144
+ parser.add_argument(
145
+ "--t5_fsdp",
146
+ action="store_true",
147
+ default=False,
148
+ help="Whether to use FSDP for T5.")
149
+ parser.add_argument(
150
+ "--t5_cpu",
151
+ action="store_true",
152
+ default=False,
153
+ help="Whether to place T5 model on CPU.")
154
+ parser.add_argument(
155
+ "--dit_fsdp",
156
+ action="store_true",
157
+ default=False,
158
+ help="Whether to use FSDP for DiT.")
159
+ parser.add_argument(
160
+ "--save_file",
161
+ type=str,
162
+ default=None,
163
+ help="The file to save the generated video to.")
164
+ parser.add_argument(
165
+ "--prompt",
166
+ type=str,
167
+ default=None,
168
+ help="The prompt to generate the video from.")
169
+ parser.add_argument(
170
+ "--use_prompt_extend",
171
+ action="store_true",
172
+ default=False,
173
+ help="Whether to use prompt extend.")
174
+ parser.add_argument(
175
+ "--prompt_extend_method",
176
+ type=str,
177
+ default="local_qwen",
178
+ choices=["dashscope", "local_qwen"],
179
+ help="The prompt extend method to use.")
180
+ parser.add_argument(
181
+ "--prompt_extend_model",
182
+ type=str,
183
+ default=None,
184
+ help="The prompt extend model to use.")
185
+ parser.add_argument(
186
+ "--prompt_extend_target_lang",
187
+ type=str,
188
+ default="zh",
189
+ choices=["zh", "en"],
190
+ help="The target language of prompt extend.")
191
+ parser.add_argument(
192
+ "--base_seed",
193
+ type=int,
194
+ default=-1,
195
+ help="The seed to use for generating the video.")
196
+ parser.add_argument(
197
+ "--image",
198
+ type=str,
199
+ default=None,
200
+ help="The image to generate the video from.")
201
+ parser.add_argument(
202
+ "--sample_solver",
203
+ type=str,
204
+ default='unipc',
205
+ choices=['unipc', 'dpm++'],
206
+ help="The solver used to sample.")
207
+ parser.add_argument(
208
+ "--sample_steps", type=int, default=None, help="The sampling steps.")
209
+ parser.add_argument(
210
+ "--sample_shift",
211
+ type=float,
212
+ default=None,
213
+ help="Sampling shift factor for flow matching schedulers.")
214
+ parser.add_argument(
215
+ "--sample_guide_scale",
216
+ type=float,
217
+ default=None,
218
+ help="Classifier free guidance scale.")
219
+ parser.add_argument(
220
+ "--convert_model_dtype",
221
+ action="store_true",
222
+ default=False,
223
+ help="Whether to convert model paramerters dtype.")
224
+
225
+ # animate
226
+ parser.add_argument(
227
+ "--src_root_path",
228
+ type=str,
229
+ default=None,
230
+ help="The file of the process output path. Default None.")
231
+ parser.add_argument(
232
+ "--refert_num",
233
+ type=int,
234
+ default=77,
235
+ help="How many frames used for temporal guidance. Recommended to be 1 or 5."
236
+ )
237
+ parser.add_argument(
238
+ "--replace_flag",
239
+ action="store_true",
240
+ default=False,
241
+ help="Whether to use replace.")
242
+ parser.add_argument(
243
+ "--use_relighting_lora",
244
+ action="store_true",
245
+ default=False,
246
+ help="Whether to use relighting lora.")
247
+
248
+ # following args only works for s2v
249
+ parser.add_argument(
250
+ "--num_clip",
251
+ type=int,
252
+ default=None,
253
+ help="Number of video clips to generate, the whole video will not exceed the length of audio."
254
+ )
255
+ parser.add_argument(
256
+ "--audio",
257
+ type=str,
258
+ default=None,
259
+ help="Path to the audio file, e.g. wav, mp3")
260
+ parser.add_argument(
261
+ "--enable_tts",
262
+ action="store_true",
263
+ default=False,
264
+ help="Use CosyVoice to synthesis audio")
265
+ parser.add_argument(
266
+ "--tts_prompt_audio",
267
+ type=str,
268
+ default=None,
269
+ help="Path to the tts prompt audio file, e.g. wav, mp3. Must be greater than 16khz, and between 5s to 15s.")
270
+ parser.add_argument(
271
+ "--tts_prompt_text",
272
+ type=str,
273
+ default=None,
274
+ help="Content to the tts prompt audio. If provided, must exactly match tts_prompt_audio")
275
+ parser.add_argument(
276
+ "--tts_text",
277
+ type=str,
278
+ default=None,
279
+ help="Text wish to synthesize")
280
+ parser.add_argument(
281
+ "--pose_video",
282
+ type=str,
283
+ default=None,
284
+ help="Provide Dw-pose sequence to do Pose Driven")
285
+ parser.add_argument(
286
+ "--start_from_ref",
287
+ action="store_true",
288
+ default=False,
289
+ help="whether set the reference image as the starting point for generation"
290
+ )
291
+ parser.add_argument(
292
+ "--infer_frames",
293
+ type=int,
294
+ default=80,
295
+ help="Number of frames per clip, 48 or 80 or others (must be multiple of 4) for 14B s2v"
296
+ )
297
+ args = parser.parse_args()
298
+ _validate_args(args)
299
+
300
+ return args
301
+
302
+
303
+ def _init_logging(rank):
304
+ # logging
305
+ if rank == 0:
306
+ # set format
307
+ logging.basicConfig(
308
+ level=logging.INFO,
309
+ format="[%(asctime)s] %(levelname)s: %(message)s",
310
+ handlers=[logging.StreamHandler(stream=sys.stdout)])
311
+ else:
312
+ logging.basicConfig(level=logging.ERROR)
313
+
314
+
315
+ def generate(args):
316
+ rank = int(os.getenv("RANK", 0))
317
+ world_size = int(os.getenv("WORLD_SIZE", 1))
318
+ local_rank = int(os.getenv("LOCAL_RANK", 0))
319
+ device = local_rank
320
+ _init_logging(rank)
321
+
322
+ if args.offload_model is None:
323
+ args.offload_model = False if world_size > 1 else True
324
+ logging.info(
325
+ f"offload_model is not specified, set to {args.offload_model}.")
326
+ if world_size > 1:
327
+ torch.cuda.set_device(local_rank)
328
+ dist.init_process_group(
329
+ backend="nccl",
330
+ init_method="env://",
331
+ rank=rank,
332
+ world_size=world_size)
333
+ else:
334
+ assert not (
335
+ args.t5_fsdp or args.dit_fsdp
336
+ ), f"t5_fsdp and dit_fsdp are not supported in non-distributed environments."
337
+ assert not (
338
+ args.ulysses_size > 1
339
+ ), f"sequence parallel are not supported in non-distributed environments."
340
+
341
+ if args.ulysses_size > 1:
342
+ assert args.ulysses_size == world_size, f"The number of ulysses_size should be equal to the world size."
343
+ init_distributed_group()
344
+
345
+ if args.use_prompt_extend:
346
+ if args.prompt_extend_method == "dashscope":
347
+ prompt_expander = DashScopePromptExpander(
348
+ model_name=args.prompt_extend_model,
349
+ task=args.task,
350
+ is_vl=args.image is not None)
351
+ elif args.prompt_extend_method == "local_qwen":
352
+ prompt_expander = QwenPromptExpander(
353
+ model_name=args.prompt_extend_model,
354
+ task=args.task,
355
+ is_vl=args.image is not None,
356
+ device=rank)
357
+ else:
358
+ raise NotImplementedError(
359
+ f"Unsupport prompt_extend_method: {args.prompt_extend_method}")
360
+
361
+ cfg = WAN_CONFIGS[args.task]
362
+ if args.ulysses_size > 1:
363
+ assert cfg.num_heads % args.ulysses_size == 0, f"`{cfg.num_heads=}` cannot be divided evenly by `{args.ulysses_size=}`."
364
+
365
+ logging.info(f"Generation job args: {args}")
366
+ logging.info(f"Generation model config: {cfg}")
367
+
368
+ if dist.is_initialized():
369
+ base_seed = [args.base_seed] if rank == 0 else [None]
370
+ dist.broadcast_object_list(base_seed, src=0)
371
+ args.base_seed = base_seed[0]
372
+
373
+ logging.info(f"Input prompt: {args.prompt}")
374
+ img = None
375
+ if args.image is not None:
376
+ img = Image.open(args.image).convert("RGB")
377
+ logging.info(f"Input image: {args.image}")
378
+
379
+ # prompt extend
380
+ if args.use_prompt_extend:
381
+ logging.info("Extending prompt ...")
382
+ if rank == 0:
383
+ prompt_output = prompt_expander(
384
+ args.prompt,
385
+ image=img,
386
+ tar_lang=args.prompt_extend_target_lang,
387
+ seed=args.base_seed)
388
+ if prompt_output.status == False:
389
+ logging.info(
390
+ f"Extending prompt failed: {prompt_output.message}")
391
+ logging.info("Falling back to original prompt.")
392
+ input_prompt = args.prompt
393
+ else:
394
+ input_prompt = prompt_output.prompt
395
+ input_prompt = [input_prompt]
396
+ else:
397
+ input_prompt = [None]
398
+ if dist.is_initialized():
399
+ dist.broadcast_object_list(input_prompt, src=0)
400
+ args.prompt = input_prompt[0]
401
+ logging.info(f"Extended prompt: {args.prompt}")
402
+
403
+ if "t2v" in args.task:
404
+ logging.info("Creating WanT2V pipeline.")
405
+ wan_t2v = wan.WanT2V(
406
+ config=cfg,
407
+ checkpoint_dir=args.ckpt_dir,
408
+ device_id=device,
409
+ rank=rank,
410
+ t5_fsdp=args.t5_fsdp,
411
+ dit_fsdp=args.dit_fsdp,
412
+ use_sp=(args.ulysses_size > 1),
413
+ t5_cpu=args.t5_cpu,
414
+ convert_model_dtype=args.convert_model_dtype,
415
+ )
416
+
417
+ logging.info(f"Generating video ...")
418
+ video = wan_t2v.generate(
419
+ args.prompt,
420
+ size=SIZE_CONFIGS[args.size],
421
+ frame_num=args.frame_num,
422
+ shift=args.sample_shift,
423
+ sample_solver=args.sample_solver,
424
+ sampling_steps=args.sample_steps,
425
+ guide_scale=args.sample_guide_scale,
426
+ seed=args.base_seed,
427
+ offload_model=args.offload_model)
428
+ elif "ti2v" in args.task:
429
+ logging.info("Creating WanTI2V pipeline.")
430
+ wan_ti2v = wan.WanTI2V(
431
+ config=cfg,
432
+ checkpoint_dir=args.ckpt_dir,
433
+ device_id=device,
434
+ rank=rank,
435
+ t5_fsdp=args.t5_fsdp,
436
+ dit_fsdp=args.dit_fsdp,
437
+ use_sp=(args.ulysses_size > 1),
438
+ t5_cpu=args.t5_cpu,
439
+ convert_model_dtype=args.convert_model_dtype,
440
+ )
441
+
442
+ logging.info(f"Generating video ...")
443
+ video = wan_ti2v.generate(
444
+ args.prompt,
445
+ img=img,
446
+ size=SIZE_CONFIGS[args.size],
447
+ max_area=MAX_AREA_CONFIGS[args.size],
448
+ frame_num=args.frame_num,
449
+ shift=args.sample_shift,
450
+ sample_solver=args.sample_solver,
451
+ sampling_steps=args.sample_steps,
452
+ guide_scale=args.sample_guide_scale,
453
+ seed=args.base_seed,
454
+ offload_model=args.offload_model)
455
+ elif "animate" in args.task:
456
+ logging.info("Creating Wan-Animate pipeline.")
457
+ wan_animate = wan.WanAnimate(
458
+ config=cfg,
459
+ checkpoint_dir=args.ckpt_dir,
460
+ device_id=device,
461
+ rank=rank,
462
+ t5_fsdp=args.t5_fsdp,
463
+ dit_fsdp=args.dit_fsdp,
464
+ use_sp=(args.ulysses_size > 1),
465
+ t5_cpu=args.t5_cpu,
466
+ convert_model_dtype=args.convert_model_dtype,
467
+ use_relighting_lora=args.use_relighting_lora
468
+ )
469
+
470
+ logging.info(f"Generating video ...")
471
+ video = wan_animate.generate(
472
+ src_root_path=args.src_root_path,
473
+ replace_flag=args.replace_flag,
474
+ refert_num = args.refert_num,
475
+ clip_len=args.frame_num,
476
+ shift=args.sample_shift,
477
+ sample_solver=args.sample_solver,
478
+ sampling_steps=args.sample_steps,
479
+ guide_scale=args.sample_guide_scale,
480
+ seed=args.base_seed,
481
+ offload_model=args.offload_model)
482
+ elif "s2v" in args.task:
483
+ logging.info("Creating WanS2V pipeline.")
484
+ wan_s2v = wan.WanS2V(
485
+ config=cfg,
486
+ checkpoint_dir=args.ckpt_dir,
487
+ device_id=device,
488
+ rank=rank,
489
+ t5_fsdp=args.t5_fsdp,
490
+ dit_fsdp=args.dit_fsdp,
491
+ use_sp=(args.ulysses_size > 1),
492
+ t5_cpu=args.t5_cpu,
493
+ convert_model_dtype=args.convert_model_dtype,
494
+ )
495
+ logging.info(f"Generating video ...")
496
+ video = wan_s2v.generate(
497
+ input_prompt=args.prompt,
498
+ ref_image_path=args.image,
499
+ audio_path=args.audio,
500
+ enable_tts=args.enable_tts,
501
+ tts_prompt_audio=args.tts_prompt_audio,
502
+ tts_prompt_text=args.tts_prompt_text,
503
+ tts_text=args.tts_text,
504
+ num_repeat=args.num_clip,
505
+ pose_video=args.pose_video,
506
+ max_area=MAX_AREA_CONFIGS[args.size],
507
+ infer_frames=args.infer_frames,
508
+ shift=args.sample_shift,
509
+ sample_solver=args.sample_solver,
510
+ sampling_steps=args.sample_steps,
511
+ guide_scale=args.sample_guide_scale,
512
+ seed=args.base_seed,
513
+ offload_model=args.offload_model,
514
+ init_first_frame=args.start_from_ref,
515
+ )
516
+ else:
517
+ logging.info("Creating WanI2V pipeline.")
518
+ wan_i2v = wan.WanI2V(
519
+ config=cfg,
520
+ checkpoint_dir=args.ckpt_dir,
521
+ device_id=device,
522
+ rank=rank,
523
+ t5_fsdp=args.t5_fsdp,
524
+ dit_fsdp=args.dit_fsdp,
525
+ use_sp=(args.ulysses_size > 1),
526
+ t5_cpu=args.t5_cpu,
527
+ convert_model_dtype=args.convert_model_dtype,
528
+ )
529
+ logging.info("Generating video ...")
530
+ video = wan_i2v.generate(
531
+ args.prompt,
532
+ img,
533
+ max_area=MAX_AREA_CONFIGS[args.size],
534
+ frame_num=args.frame_num,
535
+ shift=args.sample_shift,
536
+ sample_solver=args.sample_solver,
537
+ sampling_steps=args.sample_steps,
538
+ guide_scale=args.sample_guide_scale,
539
+ seed=args.base_seed,
540
+ offload_model=args.offload_model)
541
+
542
+ if rank == 0:
543
+ if args.save_file is None:
544
+ formatted_time = datetime.now().strftime("%Y%m%d_%H%M%S")
545
+ formatted_prompt = args.prompt.replace(" ", "_").replace("/",
546
+ "_")[:50]
547
+ suffix = '.mp4'
548
+ args.save_file = f"{args.task}_{args.size.replace('*','x') if sys.platform=='win32' else args.size}_{args.ulysses_size}_{formatted_prompt}_{formatted_time}" + suffix
549
+
550
+ logging.info(f"Saving generated video to {args.save_file}")
551
+ save_video(
552
+ tensor=video[None],
553
+ save_file=args.save_file,
554
+ fps=cfg.sample_fps,
555
+ nrow=1,
556
+ normalize=True,
557
+ value_range=(-1, 1))
558
+ if "s2v" in args.task:
559
+ if args.enable_tts is False:
560
+ merge_video_audio(video_path=args.save_file, audio_path=args.audio)
561
+ else:
562
+ merge_video_audio(video_path=args.save_file, audio_path="tts.wav")
563
+ del video
564
+
565
+ torch.cuda.synchronize()
566
+ if dist.is_initialized():
567
+ dist.barrier()
568
+ dist.destroy_process_group()
569
+
570
+ logging.info("Finished.")
571
+
572
+
573
+ if __name__ == "__main__":
574
+ args = _parse_args()
575
+ generate(args)
Wan2.2/pyproject.toml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "wan"
7
+ version = "2.2.0"
8
+ description = "Wan: Open and Advanced Large-Scale Video Generative Models"
9
+ authors = [
10
+ { name = "Wan Team", email = "wan.ai@alibabacloud.com" }
11
+ ]
12
+ license = { file = "LICENSE.txt" }
13
+ readme = "README.md"
14
+ requires-python = ">=3.10,<4.0"
15
+ dependencies = [
16
+ "torch>=2.4.0",
17
+ "torchvision>=0.19.0",
18
+ "opencv-python>=4.9.0.80",
19
+ "diffusers>=0.31.0",
20
+ "transformers>=4.49.0",
21
+ "tokenizers>=0.20.3",
22
+ "accelerate>=1.1.1",
23
+ "tqdm",
24
+ "imageio",
25
+ "easydict",
26
+ "ftfy",
27
+ "dashscope",
28
+ "imageio-ffmpeg",
29
+ "flash_attn",
30
+ "numpy>=1.23.5,<2"
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest",
36
+ "black",
37
+ "flake8",
38
+ "isort",
39
+ "mypy",
40
+ "huggingface-hub[cli]"
41
+ ]
42
+
43
+ [project.urls]
44
+ homepage = "https://wanxai.com"
45
+ documentation = "https://github.com/Wan-Video/Wan2.2"
46
+ repository = "https://github.com/Wan-Video/Wan2.2"
47
+ huggingface = "https://huggingface.co/Wan-AI/"
48
+ modelscope = "https://modelscope.cn/organization/Wan-AI"
49
+ discord = "https://discord.gg/p5XbdQV7"
50
+
51
+ [tool.setuptools]
52
+ packages = ["wan"]
53
+
54
+ [tool.setuptools.package-data]
55
+ "wan" = ["**/*.py"]
56
+
57
+ [tool.black]
58
+ line-length = 88
59
+
60
+ [tool.isort]
61
+ profile = "black"
62
+
63
+ [tool.mypy]
64
+ strict = true
65
+
66
+
Wan2.2/requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.4.0
2
+ torchvision>=0.19.0
3
+ torchaudio
4
+ opencv-python>=4.9.0.80
5
+ diffusers>=0.31.0
6
+ transformers>=4.49.0,<=4.51.3
7
+ tokenizers>=0.20.3
8
+ accelerate>=1.1.1
9
+ tqdm
10
+ imageio[ffmpeg]
11
+ easydict
12
+ ftfy
13
+ dashscope
14
+ imageio-ffmpeg
15
+ flash_attn
16
+ numpy>=1.23.5,<2
Wan2.2/requirements_animate.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ decord
2
+ peft
3
+ onnxruntime
4
+ pandas
5
+ matplotlib
6
+ -e git+https://github.com/facebookresearch/sam2.git@0e78a118995e66bb27d78518c4bd9a3e95b4e266#egg=SAM-2
7
+ loguru
8
+ sentencepiece
Wan2.2/requirements_s2v.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai-whisper
2
+ HyperPyYAML
3
+ onnxruntime
4
+ inflect
5
+ wetext
6
+ omegaconf
7
+ conformer
8
+ hydra-core
9
+ lightning
10
+ rich
11
+ gdown
12
+ matplotlib
13
+ wget
14
+ pyarrow
15
+ pyworld
16
+ librosa
17
+ decord
18
+ modelscope
19
+ GitPython
Wan2.2/tests/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ Put all your models (Wan2.2-T2V-A14B, Wan2.2-I2V-A14B, Wan2.2-TI2V-5B) in a folder and specify the max GPU number you want to use.
3
+
4
+ ```bash
5
+ bash ./tests/test.sh <local model dir> <gpu number>
6
+ ```
Wan2.2/tests/test.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -x
3
+
4
+ unset NCCL_DEBUG
5
+
6
+ if [ "$#" -eq 2 ]; then
7
+ MODEL_DIR=$(realpath "$1")
8
+ GPUS=$2
9
+ else
10
+ echo "Usage: $0 <local model dir> <gpu number>"
11
+ exit 1
12
+ fi
13
+
14
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
15
+ REPO_ROOT="$(dirname "$SCRIPT_DIR")"
16
+ cd "$REPO_ROOT" || exit 1
17
+
18
+ PY_FILE=./generate.py
19
+
20
+
21
+ function t2v_A14B() {
22
+ CKPT_DIR="$MODEL_DIR/Wan2.2-T2V-A14B"
23
+
24
+ # # 1-GPU Test
25
+ # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B 1-GPU Test: "
26
+ # python $PY_FILE --task t2v-A14B --size 480*832 --ckpt_dir $CKPT_DIR
27
+
28
+ # Multiple GPU Test
29
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
30
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
31
+
32
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
33
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
34
+
35
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU Test: "
36
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
37
+
38
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_A14B Multiple GPU, prompt extend local_qwen: "
39
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task t2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
40
+ }
41
+
42
+
43
+ function i2v_A14B() {
44
+ CKPT_DIR="$MODEL_DIR/Wan2.2-I2V-A14B"
45
+
46
+ # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B 1-GPU Test: "
47
+ # python $PY_FILE --task i2v-A14B --size 832*480 --ckpt_dir $CKPT_DIR
48
+
49
+ # Multiple GPU Test
50
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU Test: "
51
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 832*480 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
52
+
53
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
54
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 720*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
55
+
56
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend local_qwen: "
57
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 1280*720 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-VL-3B-Instruct" --prompt_extend_target_lang "en"
58
+
59
+ if [ -n "${DASH_API_KEY+x}" ]; then
60
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> i2v_14B Multiple GPU, prompt extend dashscope: "
61
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task i2v-A14B --ckpt_dir $CKPT_DIR --size 480*832 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_method "dashscope"
62
+ else
63
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> No DASH_API_KEY found, skip the dashscope extend test."
64
+ fi
65
+ }
66
+
67
+ function ti2v_5B() {
68
+ CKPT_DIR="$MODEL_DIR/Wan2.2-TI2V-5B"
69
+
70
+ # # 1-GPU Test
71
+ # echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v 1-GPU Test: "
72
+ # python $PY_FILE --task ti2v-5B --size 1280*704 --ckpt_dir $CKPT_DIR
73
+
74
+ # Multiple GPU Test
75
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU Test: "
76
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS
77
+
78
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B t2v Multiple GPU, prompt extend local_qwen: "
79
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang "en"
80
+
81
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU Test: "
82
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 704*1280 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
83
+
84
+ echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ti2v_5B i2v Multiple GPU, prompt extend local_qwen: "
85
+ torchrun --nproc_per_node=$GPUS $PY_FILE --task ti2v-5B --ckpt_dir $CKPT_DIR --size 1280*704 --dit_fsdp --t5_fsdp --ulysses_size $GPUS --use_prompt_extend --prompt_extend_model "Qwen/Qwen2.5-3B-Instruct" --prompt_extend_target_lang 'en' --prompt "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. The fluffy-furred feline gazes directly at the camera with a relaxed expression. Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds. The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight. A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside." --image "examples/i2v_input.JPG"
86
+
87
+ }
88
+
89
+ t2v_A14B
90
+ i2v_A14B
91
+ ti2v_5B
Wan2.2/wan/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ from . import configs, distributed, modules
3
+ from .image2video import WanI2V
4
+ from .speech2video import WanS2V
5
+ from .text2video import WanT2V
6
+ from .textimage2video import WanTI2V
7
+ from .animate import WanAnimate
Wan2.2/wan/animate.py ADDED
@@ -0,0 +1,648 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import logging
3
+ import math
4
+ import os
5
+ import cv2
6
+ import types
7
+ from copy import deepcopy
8
+ from functools import partial
9
+ from einops import rearrange
10
+ import numpy as np
11
+ import torch
12
+
13
+ import torch.distributed as dist
14
+ from peft import set_peft_model_state_dict
15
+ from decord import VideoReader
16
+ from tqdm import tqdm
17
+ import torch.nn.functional as F
18
+ from .distributed.fsdp import shard_model
19
+ from .distributed.sequence_parallel import sp_attn_forward, sp_dit_forward
20
+ from .distributed.util import get_world_size
21
+
22
+ from .modules.animate import WanAnimateModel
23
+ from .modules.animate import CLIPModel
24
+ from .modules.t5 import T5EncoderModel
25
+ from .modules.vae2_1 import Wan2_1_VAE
26
+ from .modules.animate.animate_utils import TensorList, get_loraconfig
27
+ from .utils.fm_solvers import (
28
+ FlowDPMSolverMultistepScheduler,
29
+ get_sampling_sigmas,
30
+ retrieve_timesteps,
31
+ )
32
+ from .utils.fm_solvers_unipc import FlowUniPCMultistepScheduler
33
+
34
+
35
+
36
+ class WanAnimate:
37
+
38
+ def __init__(
39
+ self,
40
+ config,
41
+ checkpoint_dir,
42
+ device_id=0,
43
+ rank=0,
44
+ t5_fsdp=False,
45
+ dit_fsdp=False,
46
+ use_sp=False,
47
+ t5_cpu=False,
48
+ init_on_cpu=True,
49
+ convert_model_dtype=False,
50
+ use_relighting_lora=False
51
+ ):
52
+ r"""
53
+ Initializes the generation model components.
54
+
55
+ Args:
56
+ config (EasyDict):
57
+ Object containing model parameters initialized from config.py
58
+ checkpoint_dir (`str`):
59
+ Path to directory containing model checkpoints
60
+ device_id (`int`, *optional*, defaults to 0):
61
+ Id of target GPU device
62
+ rank (`int`, *optional*, defaults to 0):
63
+ Process rank for distributed training
64
+ t5_fsdp (`bool`, *optional*, defaults to False):
65
+ Enable FSDP sharding for T5 model
66
+ dit_fsdp (`bool`, *optional*, defaults to False):
67
+ Enable FSDP sharding for DiT model
68
+ use_sp (`bool`, *optional*, defaults to False):
69
+ Enable distribution strategy of sequence parallel.
70
+ t5_cpu (`bool`, *optional*, defaults to False):
71
+ Whether to place T5 model on CPU. Only works without t5_fsdp.
72
+ init_on_cpu (`bool`, *optional*, defaults to True):
73
+ Enable initializing Transformer Model on CPU. Only works without FSDP or USP.
74
+ convert_model_dtype (`bool`, *optional*, defaults to False):
75
+ Convert DiT model parameters dtype to 'config.param_dtype'.
76
+ Only works without FSDP.
77
+ use_relighting_lora (`bool`, *optional*, defaults to False):
78
+ Whether to use relighting lora for character replacement.
79
+ """
80
+ self.device = torch.device(f"cuda:{device_id}")
81
+ self.config = config
82
+ self.rank = rank
83
+ self.t5_cpu = t5_cpu
84
+ self.init_on_cpu = init_on_cpu
85
+
86
+ self.num_train_timesteps = config.num_train_timesteps
87
+ self.param_dtype = config.param_dtype
88
+
89
+ if t5_fsdp or dit_fsdp or use_sp:
90
+ self.init_on_cpu = False
91
+
92
+ shard_fn = partial(shard_model, device_id=device_id)
93
+ self.text_encoder = T5EncoderModel(
94
+ text_len=config.text_len,
95
+ dtype=config.t5_dtype,
96
+ device=torch.device('cpu'),
97
+ checkpoint_path=os.path.join(checkpoint_dir, config.t5_checkpoint),
98
+ tokenizer_path=os.path.join(checkpoint_dir, config.t5_tokenizer),
99
+ shard_fn=shard_fn if t5_fsdp else None,
100
+ )
101
+
102
+ self.clip = CLIPModel(
103
+ dtype=torch.float16,
104
+ device=self.device,
105
+ checkpoint_path=os.path.join(checkpoint_dir,
106
+ config.clip_checkpoint),
107
+ tokenizer_path=os.path.join(checkpoint_dir, config.clip_tokenizer))
108
+
109
+ self.vae = Wan2_1_VAE(
110
+ vae_pth=os.path.join(checkpoint_dir, config.vae_checkpoint),
111
+ device=self.device)
112
+
113
+ logging.info(f"Creating WanAnimate from {checkpoint_dir}")
114
+
115
+ if not dit_fsdp:
116
+ self.noise_model = WanAnimateModel.from_pretrained(
117
+ checkpoint_dir,
118
+ torch_dtype=self.param_dtype,
119
+ device_map=self.device)
120
+ else:
121
+ self.noise_model = WanAnimateModel.from_pretrained(
122
+ checkpoint_dir, torch_dtype=self.param_dtype)
123
+
124
+ self.noise_model = self._configure_model(
125
+ model=self.noise_model,
126
+ use_sp=use_sp,
127
+ dit_fsdp=dit_fsdp,
128
+ shard_fn=shard_fn,
129
+ convert_model_dtype=convert_model_dtype,
130
+ use_lora=use_relighting_lora,
131
+ checkpoint_dir=checkpoint_dir,
132
+ config=config
133
+ )
134
+
135
+ if use_sp:
136
+ self.sp_size = get_world_size()
137
+ else:
138
+ self.sp_size = 1
139
+
140
+ self.sample_neg_prompt = config.sample_neg_prompt
141
+ self.sample_prompt = config.prompt
142
+
143
+
144
+ def _configure_model(self, model, use_sp, dit_fsdp, shard_fn,
145
+ convert_model_dtype, use_lora, checkpoint_dir, config):
146
+ """
147
+ Configures a model object. This includes setting evaluation modes,
148
+ applying distributed parallel strategy, and handling device placement.
149
+
150
+ Args:
151
+ model (torch.nn.Module):
152
+ The model instance to configure.
153
+ use_sp (`bool`):
154
+ Enable distribution strategy of sequence parallel.
155
+ dit_fsdp (`bool`):
156
+ Enable FSDP sharding for DiT model.
157
+ shard_fn (callable):
158
+ The function to apply FSDP sharding.
159
+ convert_model_dtype (`bool`):
160
+ Convert DiT model parameters dtype to 'config.param_dtype'.
161
+ Only works without FSDP.
162
+
163
+ Returns:
164
+ torch.nn.Module:
165
+ The configured model.
166
+ """
167
+ model.eval().requires_grad_(False)
168
+
169
+ if use_sp:
170
+ for block in model.blocks:
171
+ block.self_attn.forward = types.MethodType(
172
+ sp_attn_forward, block.self_attn)
173
+
174
+ model.use_context_parallel = True
175
+
176
+ if dist.is_initialized():
177
+ dist.barrier()
178
+
179
+ if use_lora:
180
+ logging.info("Loading Relighting Lora. ")
181
+ lora_config = get_loraconfig(
182
+ transformer=model,
183
+ rank=128,
184
+ alpha=128
185
+ )
186
+ model.add_adapter(lora_config)
187
+ lora_path = os.path.join(checkpoint_dir, config.lora_checkpoint)
188
+ peft_state_dict = torch.load(lora_path)["state_dict"]
189
+ set_peft_model_state_dict(model, peft_state_dict)
190
+
191
+ if dit_fsdp:
192
+ model = shard_fn(model, use_lora=use_lora)
193
+ else:
194
+ if convert_model_dtype:
195
+ model.to(self.param_dtype)
196
+ if not self.init_on_cpu:
197
+ model.to(self.device)
198
+
199
+ return model
200
+
201
+ def inputs_padding(self, array, target_len):
202
+ idx = 0
203
+ flip = False
204
+ target_array = []
205
+ while len(target_array) < target_len:
206
+ target_array.append(deepcopy(array[idx]))
207
+ if flip:
208
+ idx -= 1
209
+ else:
210
+ idx += 1
211
+ if idx == 0 or idx == len(array) - 1:
212
+ flip = not flip
213
+ return target_array[:target_len]
214
+
215
+ def get_valid_len(self, real_len, clip_len=81, overlap=1):
216
+ real_clip_len = clip_len - overlap
217
+ last_clip_num = (real_len - overlap) % real_clip_len
218
+ if last_clip_num == 0:
219
+ extra = 0
220
+ else:
221
+ extra = real_clip_len - last_clip_num
222
+ target_len = real_len + extra
223
+ return target_len
224
+
225
+
226
+ def get_i2v_mask(self, lat_t, lat_h, lat_w, mask_len=1, mask_pixel_values=None, device="cuda"):
227
+ if mask_pixel_values is None:
228
+ msk = torch.zeros(1, (lat_t-1) * 4 + 1, lat_h, lat_w, device=device)
229
+ else:
230
+ msk = mask_pixel_values.clone()
231
+ msk[:, :mask_len] = 1
232
+ msk = torch.concat([torch.repeat_interleave(msk[:, 0:1], repeats=4, dim=1), msk[:, 1:]], dim=1)
233
+ msk = msk.view(1, msk.shape[1] // 4, 4, lat_h, lat_w)
234
+ msk = msk.transpose(1, 2)[0]
235
+ return msk
236
+
237
+ def padding_resize(self, img_ori, height=512, width=512, padding_color=(0, 0, 0), interpolation=cv2.INTER_LINEAR):
238
+ ori_height = img_ori.shape[0]
239
+ ori_width = img_ori.shape[1]
240
+ channel = img_ori.shape[2]
241
+
242
+ img_pad = np.zeros((height, width, channel))
243
+ if channel == 1:
244
+ img_pad[:, :, 0] = padding_color[0]
245
+ else:
246
+ img_pad[:, :, 0] = padding_color[0]
247
+ img_pad[:, :, 1] = padding_color[1]
248
+ img_pad[:, :, 2] = padding_color[2]
249
+
250
+ if (ori_height / ori_width) > (height / width):
251
+ new_width = int(height / ori_height * ori_width)
252
+ img = cv2.resize(img_ori, (new_width, height), interpolation=interpolation)
253
+ padding = int((width - new_width) / 2)
254
+ if len(img.shape) == 2:
255
+ img = img[:, :, np.newaxis]
256
+ img_pad[:, padding: padding + new_width, :] = img
257
+ else:
258
+ new_height = int(width / ori_width * ori_height)
259
+ img = cv2.resize(img_ori, (width, new_height), interpolation=interpolation)
260
+ padding = int((height - new_height) / 2)
261
+ if len(img.shape) == 2:
262
+ img = img[:, :, np.newaxis]
263
+ img_pad[padding: padding + new_height, :, :] = img
264
+
265
+ img_pad = np.uint8(img_pad)
266
+
267
+ return img_pad
268
+
269
+ def prepare_source(self, src_pose_path, src_face_path, src_ref_path):
270
+ pose_video_reader = VideoReader(src_pose_path)
271
+ pose_len = len(pose_video_reader)
272
+ pose_idxs = list(range(pose_len))
273
+ cond_images = pose_video_reader.get_batch(pose_idxs).asnumpy()
274
+
275
+ face_video_reader = VideoReader(src_face_path)
276
+ face_len = len(face_video_reader)
277
+ face_idxs = list(range(face_len))
278
+ face_images = face_video_reader.get_batch(face_idxs).asnumpy()
279
+ height, width = cond_images[0].shape[:2]
280
+ refer_images = cv2.imread(src_ref_path)[..., ::-1]
281
+ refer_images = self.padding_resize(refer_images, height=height, width=width)
282
+ return cond_images, face_images, refer_images
283
+
284
+ def prepare_source_for_replace(self, src_bg_path, src_mask_path):
285
+ bg_video_reader = VideoReader(src_bg_path)
286
+ bg_len = len(bg_video_reader)
287
+ bg_idxs = list(range(bg_len))
288
+ bg_images = bg_video_reader.get_batch(bg_idxs).asnumpy()
289
+
290
+ mask_video_reader = VideoReader(src_mask_path)
291
+ mask_len = len(mask_video_reader)
292
+ mask_idxs = list(range(mask_len))
293
+ mask_images = mask_video_reader.get_batch(mask_idxs).asnumpy()
294
+ mask_images = mask_images[:, :, :, 0] / 255
295
+ return bg_images, mask_images
296
+
297
+ def generate(
298
+ self,
299
+ src_root_path,
300
+ replace_flag=False,
301
+ clip_len=77,
302
+ refert_num=1,
303
+ shift=5.0,
304
+ sample_solver='dpm++',
305
+ sampling_steps=20,
306
+ guide_scale=1,
307
+ input_prompt="",
308
+ n_prompt="",
309
+ seed=-1,
310
+ offload_model=True,
311
+ ):
312
+ r"""
313
+ Generates video frames from input image using diffusion process.
314
+
315
+ Args:
316
+ src_root_path ('str'):
317
+ Process output path
318
+ replace_flag (`bool`, *optional*, defaults to False):
319
+ Whether to use character replace.
320
+ clip_len (`int`, *optional*, defaults to 77):
321
+ How many frames to generate per clips. The number should be 4n+1
322
+ refert_num (`int`, *optional*, defaults to 1):
323
+ How many frames used for temporal guidance. Recommended to be 1 or 5.
324
+ shift (`float`, *optional*, defaults to 5.0):
325
+ Noise schedule shift parameter.
326
+ sample_solver (`str`, *optional*, defaults to 'dpm++'):
327
+ Solver used to sample the video.
328
+ sampling_steps (`int`, *optional*, defaults to 20):
329
+ Number of diffusion sampling steps. Higher values improve quality but slow generation
330
+ guide_scale (`float` or tuple[`float`], *optional*, defaults 1.0):
331
+ Classifier-free guidance scale. We only use it for expression control.
332
+ In most cases, it's not necessary and faster generation can be achieved without it.
333
+ When expression adjustments are needed, you may consider using this feature.
334
+ input_prompt (`str`):
335
+ Text prompt for content generation. We don't recommend custom prompts (although they work)
336
+ n_prompt (`str`, *optional*, defaults to ""):
337
+ Negative prompt for content exclusion. If not given, use `config.sample_neg_prompt`
338
+ seed (`int`, *optional*, defaults to -1):
339
+ Random seed for noise generation. If -1, use random seed
340
+ offload_model (`bool`, *optional*, defaults to True):
341
+ If True, offloads models to CPU during generation to save VRAM
342
+
343
+ Returns:
344
+ torch.Tensor:
345
+ Generated video frames tensor. Dimensions: (C, N, H, W) where:
346
+ - C: Color channels (3 for RGB)
347
+ - N: Number of frames
348
+ - H: Frame height
349
+ - W: Frame width
350
+ """
351
+ assert refert_num == 1 or refert_num == 5, "refert_num should be 1 or 5."
352
+
353
+ seed_g = torch.Generator(device=self.device)
354
+ seed_g.manual_seed(seed)
355
+
356
+ if n_prompt == "":
357
+ n_prompt = self.sample_neg_prompt
358
+
359
+ if input_prompt == "":
360
+ input_prompt = self.sample_prompt
361
+
362
+ src_pose_path = os.path.join(src_root_path, "src_pose.mp4")
363
+ src_face_path = os.path.join(src_root_path, "src_face.mp4")
364
+ src_ref_path = os.path.join(src_root_path, "src_ref.png")
365
+
366
+ cond_images, face_images, refer_images = self.prepare_source(src_pose_path=src_pose_path, src_face_path=src_face_path, src_ref_path=src_ref_path)
367
+
368
+ if not self.t5_cpu:
369
+ self.text_encoder.model.to(self.device)
370
+ context = self.text_encoder([input_prompt], self.device)
371
+ context_null = self.text_encoder([n_prompt], self.device)
372
+ if offload_model:
373
+ self.text_encoder.model.cpu()
374
+ else:
375
+ context = self.text_encoder([input_prompt], torch.device('cpu'))
376
+ context_null = self.text_encoder([n_prompt], torch.device('cpu'))
377
+ context = [t.to(self.device) for t in context]
378
+ context_null = [t.to(self.device) for t in context_null]
379
+
380
+ real_frame_len = len(cond_images)
381
+ target_len = self.get_valid_len(real_frame_len, clip_len, overlap=refert_num)
382
+ logging.info('real frames: {} target frames: {}'.format(real_frame_len, target_len))
383
+ cond_images = self.inputs_padding(cond_images, target_len)
384
+ face_images = self.inputs_padding(face_images, target_len)
385
+
386
+ if replace_flag:
387
+ src_bg_path = os.path.join(src_root_path, "src_bg.mp4")
388
+ src_mask_path = os.path.join(src_root_path, "src_mask.mp4")
389
+ bg_images, mask_images = self.prepare_source_for_replace(src_bg_path, src_mask_path)
390
+ bg_images = self.inputs_padding(bg_images, target_len)
391
+ mask_images = self.inputs_padding(mask_images, target_len)
392
+
393
+ height, width = refer_images.shape[:2]
394
+ start = 0
395
+ end = clip_len
396
+ all_out_frames = []
397
+ while True:
398
+ if start + refert_num >= len(cond_images):
399
+ break
400
+
401
+ if start == 0:
402
+ mask_reft_len = 0
403
+ else:
404
+ mask_reft_len = refert_num
405
+
406
+ batch = {
407
+ "conditioning_pixel_values": torch.zeros(1, 3, clip_len, height, width),
408
+ "bg_pixel_values": torch.zeros(1, 3, clip_len, height, width),
409
+ "mask_pixel_values": torch.zeros(1, 1, clip_len, height, width),
410
+ "face_pixel_values": torch.zeros(1, 3, clip_len, 512, 512),
411
+ "refer_pixel_values": torch.zeros(1, 3, height, width),
412
+ "refer_t_pixel_values": torch.zeros(refert_num, 3, height, width)
413
+ }
414
+
415
+ batch["conditioning_pixel_values"] = rearrange(
416
+ torch.tensor(np.stack(cond_images[start:end]) / 127.5 - 1),
417
+ "t h w c -> 1 c t h w",
418
+ )
419
+ batch["face_pixel_values"] = rearrange(
420
+ torch.tensor(np.stack(face_images[start:end]) / 127.5 - 1),
421
+ "t h w c -> 1 c t h w",
422
+ )
423
+
424
+ batch["refer_pixel_values"] = rearrange(
425
+ torch.tensor(refer_images / 127.5 - 1), "h w c -> 1 c h w"
426
+ )
427
+
428
+ if start > 0:
429
+ batch["refer_t_pixel_values"] = rearrange(
430
+ out_frames[0, :, -refert_num:].clone().detach(),
431
+ "c t h w -> t c h w",
432
+ )
433
+
434
+ batch["refer_t_pixel_values"] = rearrange(batch["refer_t_pixel_values"],
435
+ "t c h w -> 1 c t h w",
436
+ )
437
+
438
+ if replace_flag:
439
+ batch["bg_pixel_values"] = rearrange(
440
+ torch.tensor(np.stack(bg_images[start:end]) / 127.5 - 1),
441
+ "t h w c -> 1 c t h w",
442
+ )
443
+
444
+ batch["mask_pixel_values"] = rearrange(
445
+ torch.tensor(np.stack(mask_images[start:end])[:, :, :, None]),
446
+ "t h w c -> 1 t c h w",
447
+ )
448
+
449
+
450
+ for key, value in batch.items():
451
+ if isinstance(value, torch.Tensor):
452
+ batch[key] = value.to(device=self.device, dtype=torch.bfloat16)
453
+
454
+ ref_pixel_values = batch["refer_pixel_values"]
455
+ refer_t_pixel_values = batch["refer_t_pixel_values"]
456
+ conditioning_pixel_values = batch["conditioning_pixel_values"]
457
+ face_pixel_values = batch["face_pixel_values"]
458
+
459
+ B, _, H, W = ref_pixel_values.shape
460
+ T = clip_len
461
+ lat_h = H // 8
462
+ lat_w = W // 8
463
+ lat_t = T // 4 + 1
464
+ target_shape = [lat_t + 1, lat_h, lat_w]
465
+ noise = [
466
+ torch.randn(
467
+ 16,
468
+ target_shape[0],
469
+ target_shape[1],
470
+ target_shape[2],
471
+ dtype=torch.float32,
472
+ device=self.device,
473
+ generator=seed_g,
474
+ )
475
+ ]
476
+
477
+ max_seq_len = int(math.ceil(np.prod(target_shape) // 4 / self.sp_size)) * self.sp_size
478
+ if max_seq_len % self.sp_size != 0:
479
+ raise ValueError(f"max_seq_len {max_seq_len} is not divisible by sp_size {self.sp_size}")
480
+
481
+ with (
482
+ torch.autocast(device_type=str(self.device), dtype=torch.bfloat16, enabled=True),
483
+ torch.no_grad()
484
+ ):
485
+ if sample_solver == 'unipc':
486
+ sample_scheduler = FlowUniPCMultistepScheduler(
487
+ num_train_timesteps=self.num_train_timesteps,
488
+ shift=1,
489
+ use_dynamic_shifting=False)
490
+ sample_scheduler.set_timesteps(
491
+ sampling_steps, device=self.device, shift=shift)
492
+ timesteps = sample_scheduler.timesteps
493
+ elif sample_solver == 'dpm++':
494
+ sample_scheduler = FlowDPMSolverMultistepScheduler(
495
+ num_train_timesteps=self.num_train_timesteps,
496
+ shift=1,
497
+ use_dynamic_shifting=False)
498
+ sampling_sigmas = get_sampling_sigmas(sampling_steps, shift)
499
+ timesteps, _ = retrieve_timesteps(
500
+ sample_scheduler,
501
+ device=self.device,
502
+ sigmas=sampling_sigmas)
503
+ else:
504
+ raise NotImplementedError("Unsupported solver.")
505
+
506
+ latents = noise
507
+
508
+ pose_latents_no_ref = self.vae.encode(conditioning_pixel_values.to(torch.bfloat16))
509
+ pose_latents_no_ref = torch.stack(pose_latents_no_ref)
510
+ pose_latents = torch.cat([pose_latents_no_ref], dim=2)
511
+
512
+ ref_pixel_values = rearrange(ref_pixel_values, "t c h w -> 1 c t h w")
513
+ ref_latents = self.vae.encode(ref_pixel_values.to(torch.bfloat16))
514
+ ref_latents = torch.stack(ref_latents)
515
+
516
+ mask_ref = self.get_i2v_mask(1, lat_h, lat_w, 1, device=self.device)
517
+ y_ref = torch.concat([mask_ref, ref_latents[0]]).to(dtype=torch.bfloat16, device=self.device)
518
+
519
+ img = ref_pixel_values[0, :, 0]
520
+ clip_context = self.clip.visual([img[:, None, :, :]]).to(dtype=torch.bfloat16, device=self.device)
521
+
522
+ if mask_reft_len > 0:
523
+ if replace_flag:
524
+ bg_pixel_values = batch["bg_pixel_values"]
525
+ y_reft = self.vae.encode(
526
+ [
527
+ torch.concat([refer_t_pixel_values[0, :, :mask_reft_len], bg_pixel_values[0, :, mask_reft_len:]], dim=1).to(self.device)
528
+ ]
529
+ )[0]
530
+ mask_pixel_values = 1 - batch["mask_pixel_values"]
531
+ mask_pixel_values = rearrange(mask_pixel_values, "b t c h w -> (b t) c h w")
532
+ mask_pixel_values = F.interpolate(mask_pixel_values, size=(H//8, W//8), mode='nearest')
533
+ mask_pixel_values = rearrange(mask_pixel_values, "(b t) c h w -> b t c h w", b=1)[:,:,0]
534
+ msk_reft = self.get_i2v_mask(lat_t, lat_h, lat_w, mask_reft_len, mask_pixel_values=mask_pixel_values, device=self.device)
535
+ else:
536
+ y_reft = self.vae.encode(
537
+ [
538
+ torch.concat(
539
+ [
540
+ torch.nn.functional.interpolate(refer_t_pixel_values[0, :, :mask_reft_len].cpu(),
541
+ size=(H, W), mode="bicubic"),
542
+ torch.zeros(3, T - mask_reft_len, H, W),
543
+ ],
544
+ dim=1,
545
+ ).to(self.device)
546
+ ]
547
+ )[0]
548
+ msk_reft = self.get_i2v_mask(lat_t, lat_h, lat_w, mask_reft_len, device=self.device)
549
+ else:
550
+ if replace_flag:
551
+ bg_pixel_values = batch["bg_pixel_values"]
552
+ mask_pixel_values = 1 - batch["mask_pixel_values"]
553
+ mask_pixel_values = rearrange(mask_pixel_values, "b t c h w -> (b t) c h w")
554
+ mask_pixel_values = F.interpolate(mask_pixel_values, size=(H//8, W//8), mode='nearest')
555
+ mask_pixel_values = rearrange(mask_pixel_values, "(b t) c h w -> b t c h w", b=1)[:,:,0]
556
+ y_reft = self.vae.encode(
557
+ [
558
+ torch.concat(
559
+ [
560
+ bg_pixel_values[0],
561
+ ],
562
+ dim=1,
563
+ ).to(self.device)
564
+ ]
565
+ )[0]
566
+ msk_reft = self.get_i2v_mask(lat_t, lat_h, lat_w, mask_reft_len, mask_pixel_values=mask_pixel_values, device=self.device)
567
+ else:
568
+ y_reft = self.vae.encode(
569
+ [
570
+ torch.concat(
571
+ [
572
+ torch.zeros(3, T - mask_reft_len, H, W),
573
+ ],
574
+ dim=1,
575
+ ).to(self.device)
576
+ ]
577
+ )[0]
578
+ msk_reft = self.get_i2v_mask(lat_t, lat_h, lat_w, mask_reft_len, device=self.device)
579
+
580
+ y_reft = torch.concat([msk_reft, y_reft]).to(dtype=torch.bfloat16, device=self.device)
581
+ y = torch.concat([y_ref, y_reft], dim=1)
582
+
583
+ arg_c = {
584
+ "context": context,
585
+ "seq_len": max_seq_len,
586
+ "clip_fea": clip_context.to(dtype=torch.bfloat16, device=self.device),
587
+ "y": [y],
588
+ "pose_latents": pose_latents,
589
+ "face_pixel_values": face_pixel_values,
590
+ }
591
+
592
+ if guide_scale > 1:
593
+ face_pixel_values_uncond = face_pixel_values * 0 - 1
594
+ arg_null = {
595
+ "context": context_null,
596
+ "seq_len": max_seq_len,
597
+ "clip_fea": clip_context.to(dtype=torch.bfloat16, device=self.device),
598
+ "y": [y],
599
+ "pose_latents": pose_latents,
600
+ "face_pixel_values": face_pixel_values_uncond,
601
+ }
602
+
603
+ for i, t in enumerate(tqdm(timesteps)):
604
+ latent_model_input = latents
605
+ timestep = [t]
606
+
607
+ timestep = torch.stack(timestep)
608
+
609
+ noise_pred_cond = TensorList(
610
+ self.noise_model(TensorList(latent_model_input), t=timestep, **arg_c)
611
+ )
612
+
613
+ if guide_scale > 1:
614
+ noise_pred_uncond = TensorList(
615
+ self.noise_model(
616
+ TensorList(latent_model_input), t=timestep, **arg_null
617
+ )
618
+ )
619
+ noise_pred = noise_pred_uncond + guide_scale * (
620
+ noise_pred_cond - noise_pred_uncond
621
+ )
622
+ else:
623
+ noise_pred = noise_pred_cond
624
+
625
+ temp_x0 = sample_scheduler.step(
626
+ noise_pred[0].unsqueeze(0),
627
+ t,
628
+ latents[0].unsqueeze(0),
629
+ return_dict=False,
630
+ generator=seed_g,
631
+ )[0]
632
+ latents[0] = temp_x0.squeeze(0)
633
+
634
+ x0 = latents
635
+
636
+ x0 = [x.to(dtype=torch.float32) for x in x0]
637
+ out_frames = torch.stack(self.vae.decode([x0[0][:, 1:]]))
638
+
639
+ if start != 0:
640
+ out_frames = out_frames[:, :, refert_num:]
641
+
642
+ all_out_frames.append(out_frames.cpu())
643
+
644
+ start += clip_len - refert_num
645
+ end += clip_len - refert_num
646
+
647
+ videos = torch.cat(all_out_frames, dim=2)[:, :, :real_frame_len]
648
+ return videos[0] if self.rank == 0 else None
Wan2.2/wan/configs/__init__.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import copy
3
+ import os
4
+
5
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
6
+
7
+ from .wan_i2v_A14B import i2v_A14B
8
+ from .wan_s2v_14B import s2v_14B
9
+ from .wan_t2v_A14B import t2v_A14B
10
+ from .wan_ti2v_5B import ti2v_5B
11
+ from .wan_animate_14B import animate_14B
12
+
13
+ WAN_CONFIGS = {
14
+ 't2v-A14B': t2v_A14B,
15
+ 'i2v-A14B': i2v_A14B,
16
+ 'ti2v-5B': ti2v_5B,
17
+ 'animate-14B': animate_14B,
18
+ 's2v-14B': s2v_14B,
19
+ }
20
+
21
+ SIZE_CONFIGS = {
22
+ '720*1280': (720, 1280),
23
+ '1280*720': (1280, 720),
24
+ '480*832': (480, 832),
25
+ '832*480': (832, 480),
26
+ '704*1280': (704, 1280),
27
+ '1280*704': (1280, 704),
28
+ '1024*704': (1024, 704),
29
+ '704*1024': (704, 1024),
30
+ }
31
+
32
+ MAX_AREA_CONFIGS = {
33
+ '720*1280': 720 * 1280,
34
+ '1280*720': 1280 * 720,
35
+ '480*832': 480 * 832,
36
+ '832*480': 832 * 480,
37
+ '704*1280': 704 * 1280,
38
+ '1280*704': 1280 * 704,
39
+ '1024*704': 1024 * 704,
40
+ '704*1024': 704 * 1024,
41
+ }
42
+
43
+ SUPPORTED_SIZES = {
44
+ 't2v-A14B': ('720*1280', '1280*720', '480*832', '832*480'),
45
+ 'i2v-A14B': ('720*1280', '1280*720', '480*832', '832*480'),
46
+ 'ti2v-5B': ('704*1280', '1280*704'),
47
+ 's2v-14B': ('720*1280', '1280*720', '480*832', '832*480', '1024*704',
48
+ '704*1024', '704*1280', '1280*704'),
49
+ 'animate-14B': ('720*1280', '1280*720')
50
+ }
Wan2.2/wan/configs/shared_config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import torch
3
+ from easydict import EasyDict
4
+
5
+ #------------------------ Wan shared config ------------------------#
6
+ wan_shared_cfg = EasyDict()
7
+
8
+ # t5
9
+ wan_shared_cfg.t5_model = 'umt5_xxl'
10
+ wan_shared_cfg.t5_dtype = torch.bfloat16
11
+ wan_shared_cfg.text_len = 512
12
+
13
+ # transformer
14
+ wan_shared_cfg.param_dtype = torch.bfloat16
15
+
16
+ # inference
17
+ wan_shared_cfg.num_train_timesteps = 1000
18
+ wan_shared_cfg.sample_fps = 16
19
+ wan_shared_cfg.sample_neg_prompt = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走'
20
+ wan_shared_cfg.frame_num = 81
Wan2.2/wan/configs/wan_animate_14B.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ from easydict import EasyDict
3
+
4
+ from .shared_config import wan_shared_cfg
5
+
6
+ #------------------------ Wan animate 14B ------------------------#
7
+ animate_14B = EasyDict(__name__='Config: Wan animate 14B')
8
+ animate_14B.update(wan_shared_cfg)
9
+
10
+ animate_14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
11
+ animate_14B.t5_tokenizer = 'google/umt5-xxl'
12
+
13
+ animate_14B.clip_checkpoint = 'models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth'
14
+ animate_14B.clip_tokenizer = 'xlm-roberta-large'
15
+ animate_14B.lora_checkpoint = 'relighting_lora.ckpt'
16
+ # vae
17
+ animate_14B.vae_checkpoint = 'Wan2.1_VAE.pth'
18
+ animate_14B.vae_stride = (4, 8, 8)
19
+
20
+ # transformer
21
+ animate_14B.patch_size = (1, 2, 2)
22
+ animate_14B.dim = 5120
23
+ animate_14B.ffn_dim = 13824
24
+ animate_14B.freq_dim = 256
25
+ animate_14B.num_heads = 40
26
+ animate_14B.num_layers = 40
27
+ animate_14B.window_size = (-1, -1)
28
+ animate_14B.qk_norm = True
29
+ animate_14B.cross_attn_norm = True
30
+ animate_14B.eps = 1e-6
31
+ animate_14B.use_face_encoder = True
32
+ animate_14B.motion_encoder_dim = 512
33
+
34
+ # inference
35
+ animate_14B.sample_shift = 5.0
36
+ animate_14B.sample_steps = 20
37
+ animate_14B.sample_guide_scale = 1.0
38
+ animate_14B.frame_num = 77
39
+ animate_14B.sample_fps = 30
40
+ animate_14B.prompt = '视频中的人在做动作'
Wan2.2/wan/configs/wan_i2v_A14B.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import torch
3
+ from easydict import EasyDict
4
+
5
+ from .shared_config import wan_shared_cfg
6
+
7
+ #------------------------ Wan I2V A14B ------------------------#
8
+
9
+ i2v_A14B = EasyDict(__name__='Config: Wan I2V A14B')
10
+ i2v_A14B.update(wan_shared_cfg)
11
+
12
+ i2v_A14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
13
+ i2v_A14B.t5_tokenizer = 'google/umt5-xxl'
14
+
15
+ # vae
16
+ i2v_A14B.vae_checkpoint = 'Wan2.1_VAE.pth'
17
+ i2v_A14B.vae_stride = (4, 8, 8)
18
+
19
+ # transformer
20
+ i2v_A14B.patch_size = (1, 2, 2)
21
+ i2v_A14B.dim = 5120
22
+ i2v_A14B.ffn_dim = 13824
23
+ i2v_A14B.freq_dim = 256
24
+ i2v_A14B.num_heads = 40
25
+ i2v_A14B.num_layers = 40
26
+ i2v_A14B.window_size = (-1, -1)
27
+ i2v_A14B.qk_norm = True
28
+ i2v_A14B.cross_attn_norm = True
29
+ i2v_A14B.eps = 1e-6
30
+ i2v_A14B.low_noise_checkpoint = 'low_noise_model'
31
+ i2v_A14B.high_noise_checkpoint = 'high_noise_model'
32
+
33
+ # inference
34
+ i2v_A14B.sample_shift = 5.0
35
+ i2v_A14B.sample_steps = 40
36
+ i2v_A14B.boundary = 0.900
37
+ i2v_A14B.sample_guide_scale = (3.5, 3.5) # low noise, high noise
Wan2.2/wan/configs/wan_s2v_14B.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ from easydict import EasyDict
3
+
4
+ from .shared_config import wan_shared_cfg
5
+
6
+ #------------------------ Wan S2V 14B ------------------------#
7
+
8
+ s2v_14B = EasyDict(__name__='Config: Wan S2V 14B')
9
+ s2v_14B.update(wan_shared_cfg)
10
+
11
+ # t5
12
+ s2v_14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
13
+ s2v_14B.t5_tokenizer = 'google/umt5-xxl'
14
+
15
+ # vae
16
+ s2v_14B.vae_checkpoint = 'Wan2.1_VAE.pth'
17
+ s2v_14B.vae_stride = (4, 8, 8)
18
+
19
+ # wav2vec
20
+ s2v_14B.wav2vec = "wav2vec2-large-xlsr-53-english"
21
+
22
+ s2v_14B.num_heads = 40
23
+ # transformer
24
+ s2v_14B.transformer = EasyDict(
25
+ __name__="Config: Transformer config for WanModel_S2V")
26
+ s2v_14B.transformer.patch_size = (1, 2, 2)
27
+ s2v_14B.transformer.dim = 5120
28
+ s2v_14B.transformer.ffn_dim = 13824
29
+ s2v_14B.transformer.freq_dim = 256
30
+ s2v_14B.transformer.num_heads = 40
31
+ s2v_14B.transformer.num_layers = 40
32
+ s2v_14B.transformer.window_size = (-1, -1)
33
+ s2v_14B.transformer.qk_norm = True
34
+ s2v_14B.transformer.cross_attn_norm = True
35
+ s2v_14B.transformer.eps = 1e-6
36
+ s2v_14B.transformer.enable_adain = True
37
+ s2v_14B.transformer.adain_mode = "attn_norm"
38
+ s2v_14B.transformer.audio_inject_layers = [
39
+ 0, 4, 8, 12, 16, 20, 24, 27, 30, 33, 36, 39
40
+ ]
41
+ s2v_14B.transformer.zero_init = True
42
+ s2v_14B.transformer.zero_timestep = True
43
+ s2v_14B.transformer.enable_motioner = False
44
+ s2v_14B.transformer.add_last_motion = True
45
+ s2v_14B.transformer.trainable_token = False
46
+ s2v_14B.transformer.enable_tsm = False
47
+ s2v_14B.transformer.enable_framepack = True
48
+ s2v_14B.transformer.framepack_drop_mode = 'padd'
49
+ s2v_14B.transformer.audio_dim = 1024
50
+
51
+ s2v_14B.transformer.motion_frames = 73
52
+ s2v_14B.transformer.cond_dim = 16
53
+
54
+ # inference
55
+ s2v_14B.sample_neg_prompt = "画面模糊,最差质量,画面模糊,细节模糊不清,情绪激动剧烈,手快速抖动,字幕,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
56
+ s2v_14B.drop_first_motion = True
57
+ s2v_14B.sample_shift = 3
58
+ s2v_14B.sample_steps = 40
59
+ s2v_14B.sample_guide_scale = 4.5
Wan2.2/wan/configs/wan_t2v_A14B.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ from easydict import EasyDict
3
+
4
+ from .shared_config import wan_shared_cfg
5
+
6
+ #------------------------ Wan T2V A14B ------------------------#
7
+
8
+ t2v_A14B = EasyDict(__name__='Config: Wan T2V A14B')
9
+ t2v_A14B.update(wan_shared_cfg)
10
+
11
+ # t5
12
+ t2v_A14B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
13
+ t2v_A14B.t5_tokenizer = 'google/umt5-xxl'
14
+
15
+ # vae
16
+ t2v_A14B.vae_checkpoint = 'Wan2.1_VAE.pth'
17
+ t2v_A14B.vae_stride = (4, 8, 8)
18
+
19
+ # transformer
20
+ t2v_A14B.patch_size = (1, 2, 2)
21
+ t2v_A14B.dim = 5120
22
+ t2v_A14B.ffn_dim = 13824
23
+ t2v_A14B.freq_dim = 256
24
+ t2v_A14B.num_heads = 40
25
+ t2v_A14B.num_layers = 40
26
+ t2v_A14B.window_size = (-1, -1)
27
+ t2v_A14B.qk_norm = True
28
+ t2v_A14B.cross_attn_norm = True
29
+ t2v_A14B.eps = 1e-6
30
+ t2v_A14B.low_noise_checkpoint = 'low_noise_model'
31
+ t2v_A14B.high_noise_checkpoint = 'high_noise_model'
32
+
33
+ # inference
34
+ t2v_A14B.sample_shift = 12.0
35
+ t2v_A14B.sample_steps = 40
36
+ t2v_A14B.boundary = 0.875
37
+ t2v_A14B.sample_guide_scale = (3.0, 4.0) # low noise, high noise
Wan2.2/wan/configs/wan_ti2v_5B.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ from easydict import EasyDict
3
+
4
+ from .shared_config import wan_shared_cfg
5
+
6
+ #------------------------ Wan TI2V 5B ------------------------#
7
+
8
+ ti2v_5B = EasyDict(__name__='Config: Wan TI2V 5B')
9
+ ti2v_5B.update(wan_shared_cfg)
10
+
11
+ # t5
12
+ ti2v_5B.t5_checkpoint = 'models_t5_umt5-xxl-enc-bf16.pth'
13
+ ti2v_5B.t5_tokenizer = 'google/umt5-xxl'
14
+
15
+ # vae
16
+ ti2v_5B.vae_checkpoint = 'Wan2.2_VAE.pth'
17
+ ti2v_5B.vae_stride = (4, 16, 16)
18
+
19
+ # transformer
20
+ ti2v_5B.patch_size = (1, 2, 2)
21
+ ti2v_5B.dim = 3072
22
+ ti2v_5B.ffn_dim = 14336
23
+ ti2v_5B.freq_dim = 256
24
+ ti2v_5B.num_heads = 24
25
+ ti2v_5B.num_layers = 30
26
+ ti2v_5B.window_size = (-1, -1)
27
+ ti2v_5B.qk_norm = True
28
+ ti2v_5B.cross_attn_norm = True
29
+ ti2v_5B.eps = 1e-6
30
+
31
+ # inference
32
+ ti2v_5B.sample_fps = 24
33
+ ti2v_5B.sample_shift = 5.0
34
+ ti2v_5B.sample_steps = 50
35
+ ti2v_5B.sample_guide_scale = 5.0
36
+ ti2v_5B.frame_num = 121
Wan2.2/wan/distributed/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
Wan2.2/wan/distributed/fsdp.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
2
+ import gc
3
+ from functools import partial
4
+
5
+ import torch
6
+ from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
7
+ from torch.distributed.fsdp import MixedPrecision, ShardingStrategy
8
+ from torch.distributed.fsdp.wrap import lambda_auto_wrap_policy
9
+ from torch.distributed.utils import _free_storage
10
+
11
+
12
+ def shard_model(
13
+ model,
14
+ device_id,
15
+ param_dtype=torch.bfloat16,
16
+ reduce_dtype=torch.float32,
17
+ buffer_dtype=torch.float32,
18
+ process_group=None,
19
+ sharding_strategy=ShardingStrategy.FULL_SHARD,
20
+ sync_module_states=True,
21
+ use_lora=False
22
+ ):
23
+ model = FSDP(
24
+ module=model,
25
+ process_group=process_group,
26
+ sharding_strategy=sharding_strategy,
27
+ auto_wrap_policy=partial(
28
+ lambda_auto_wrap_policy, lambda_fn=lambda m: m in model.blocks),
29
+ mixed_precision=MixedPrecision(
30
+ param_dtype=param_dtype,
31
+ reduce_dtype=reduce_dtype,
32
+ buffer_dtype=buffer_dtype),
33
+ device_id=device_id,
34
+ sync_module_states=sync_module_states,
35
+ use_orig_params=True if use_lora else False)
36
+ return model
37
+
38
+
39
+ def free_model(model):
40
+ for m in model.modules():
41
+ if isinstance(m, FSDP):
42
+ _free_storage(m._handle.flat_param.data)
43
+ del model
44
+ gc.collect()
45
+ torch.cuda.empty_cache()