Watay commited on
Commit
8c78ded
·
verified ·
1 Parent(s): 9f9d15d

Add files using upload-large-folder tool

Browse files
14b_i2v_1step_transformer/self_forcing_generator_bf16-00001-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f52162017853e4545a97bcf15194723754129b308f9a4982825256c4669546f
3
+ size 5339173691
14b_i2v_1step_transformer/self_forcing_generator_bf16-00002-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c7691f12b6245dc1876346e433a469c8cd6eb09f08502787094a4cc999f4ce
3
+ size 5360146217
14b_i2v_1step_transformer/self_forcing_generator_bf16-00003-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec1a22add8f0eaa8e25cad48a02c8340a72ce5cdc646cece9e9d9089235016b
3
+ size 5323391436
14b_i2v_1step_transformer/self_forcing_generator_bf16-00004-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae67fb501d39e1e86796498f0929cf55f2eab2005a5e98c6fefc35ef3863ca2
3
+ size 5323478173
14b_i2v_1step_transformer/self_forcing_generator_bf16-00005-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb79c7723d80e9f59c8aeb88b2b4fc7983225c767c0103b72936a1fe8307c64
3
+ size 5360163473
14b_i2v_1step_transformer/self_forcing_generator_bf16-00006-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e63eb7990294b89a7bfdcbff2f2102068109d9fedf3a4fafc76389c66985825
3
+ size 1871008373
14b_i2v_2step_transformer/self_forcing_generator_bf16-00001-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004fc2aab223dbcf136aafc524ed8ac7b4b502c00464817c75054a3ff9c0c580
3
+ size 5339173691
14b_i2v_2step_transformer/self_forcing_generator_bf16-00002-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058b022f5576b6eba408eaa15593f327d2e5328fa8868b00698fad8ad44efcfe
3
+ size 5360146217
14b_i2v_2step_transformer/self_forcing_generator_bf16-00003-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dce5dc0e1a44e79cccce4e357b50bf0cfe512ce1cfebe82a2a1ee92ac712909
3
+ size 5323391436
14b_i2v_2step_transformer/self_forcing_generator_bf16-00004-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db5105f3f64b0073c8b732e00b13a91198e712e613f4285a7c7b1842b5cb946
3
+ size 5323478173
14b_i2v_2step_transformer/self_forcing_generator_bf16-00005-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2af772a95b1a798fa39dcecdb4dec38ea947128463bdcc0efcec10aa2c98f9d
3
+ size 5360163473
14b_i2v_2step_transformer/self_forcing_generator_bf16-00006-of-00006.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e3d9717681c908492b3f1a9e7b304dc9b0d1e7715f010689f8e486be34d481
3
+ size 1871008373
14b_i2v_2step_transformer/self_forcing_generator_bf16.index.json ADDED
@@ -0,0 +1,1110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "converter_version": "standalone-1.0",
4
+ "generated_at": "2026-05-30T06:49:15.484750+00:00",
5
+ "max_shard_size_bytes": 5368709120,
6
+ "num_shards": 6,
7
+ "source_config_path": "/Users/lihaobo/dev/aad/fastvideo/ckpts/14b_i2v_2step_transformer/config.json",
8
+ "source_model_class": "CausalWanTransformer3DModel",
9
+ "source_safetensors_format": "sharded_index",
10
+ "source_safetensors_path": "/Users/lihaobo/dev/aad/fastvideo/ckpts/14b_i2v_2step_transformer/diffusion_pytorch_model.safetensors.index.json",
11
+ "target_dtype": "bf16"
12
+ },
13
+ "weight_map": {
14
+ "blocks.0.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
15
+ "blocks.0.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
16
+ "blocks.0.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
17
+ "blocks.0.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
18
+ "blocks.0.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
19
+ "blocks.0.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
20
+ "blocks.0.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
21
+ "blocks.0.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
22
+ "blocks.0.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
23
+ "blocks.0.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
24
+ "blocks.0.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
25
+ "blocks.0.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
26
+ "blocks.0.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
27
+ "blocks.0.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
28
+ "blocks.0.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
29
+ "blocks.0.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
30
+ "blocks.0.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
31
+ "blocks.0.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
32
+ "blocks.0.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
33
+ "blocks.0.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
34
+ "blocks.0.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
35
+ "blocks.0.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
36
+ "blocks.0.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
37
+ "blocks.0.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
38
+ "blocks.0.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
39
+ "blocks.0.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
40
+ "blocks.0.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
41
+ "blocks.1.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
42
+ "blocks.1.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
43
+ "blocks.1.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
44
+ "blocks.1.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
45
+ "blocks.1.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
46
+ "blocks.1.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
47
+ "blocks.1.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
48
+ "blocks.1.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
49
+ "blocks.1.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
50
+ "blocks.1.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
51
+ "blocks.1.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
52
+ "blocks.1.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
53
+ "blocks.1.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
54
+ "blocks.1.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
55
+ "blocks.1.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
56
+ "blocks.1.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
57
+ "blocks.1.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
58
+ "blocks.1.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
59
+ "blocks.1.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
60
+ "blocks.1.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
61
+ "blocks.1.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
62
+ "blocks.1.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
63
+ "blocks.1.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
64
+ "blocks.1.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
65
+ "blocks.1.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
66
+ "blocks.1.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
67
+ "blocks.1.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
68
+ "blocks.10.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
69
+ "blocks.10.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
70
+ "blocks.10.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
71
+ "blocks.10.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
72
+ "blocks.10.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
73
+ "blocks.10.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
74
+ "blocks.10.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
75
+ "blocks.10.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
76
+ "blocks.10.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
77
+ "blocks.10.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
78
+ "blocks.10.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
79
+ "blocks.10.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
80
+ "blocks.10.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
81
+ "blocks.10.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
82
+ "blocks.10.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
83
+ "blocks.10.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
84
+ "blocks.10.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
85
+ "blocks.10.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
86
+ "blocks.10.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
87
+ "blocks.10.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
88
+ "blocks.10.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
89
+ "blocks.10.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
90
+ "blocks.10.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
91
+ "blocks.10.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
92
+ "blocks.10.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
93
+ "blocks.10.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
94
+ "blocks.10.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
95
+ "blocks.11.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
96
+ "blocks.11.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
97
+ "blocks.11.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
98
+ "blocks.11.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
99
+ "blocks.11.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
100
+ "blocks.11.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
101
+ "blocks.11.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
102
+ "blocks.11.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
103
+ "blocks.11.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
104
+ "blocks.11.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
105
+ "blocks.11.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
106
+ "blocks.11.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
107
+ "blocks.11.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
108
+ "blocks.11.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
109
+ "blocks.11.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
110
+ "blocks.11.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
111
+ "blocks.11.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
112
+ "blocks.11.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
113
+ "blocks.11.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
114
+ "blocks.11.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
115
+ "blocks.11.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
116
+ "blocks.11.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
117
+ "blocks.11.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
118
+ "blocks.11.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
119
+ "blocks.11.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
120
+ "blocks.11.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
121
+ "blocks.11.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
122
+ "blocks.12.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
123
+ "blocks.12.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
124
+ "blocks.12.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
125
+ "blocks.12.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
126
+ "blocks.12.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
127
+ "blocks.12.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
128
+ "blocks.12.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
129
+ "blocks.12.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
130
+ "blocks.12.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
131
+ "blocks.12.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
132
+ "blocks.12.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
133
+ "blocks.12.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
134
+ "blocks.12.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
135
+ "blocks.12.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
136
+ "blocks.12.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
137
+ "blocks.12.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
138
+ "blocks.12.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
139
+ "blocks.12.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
140
+ "blocks.12.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
141
+ "blocks.12.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
142
+ "blocks.12.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
143
+ "blocks.12.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
144
+ "blocks.12.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
145
+ "blocks.12.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
146
+ "blocks.12.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
147
+ "blocks.12.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
148
+ "blocks.12.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
149
+ "blocks.13.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
150
+ "blocks.13.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
151
+ "blocks.13.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
152
+ "blocks.13.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
153
+ "blocks.13.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
154
+ "blocks.13.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
155
+ "blocks.13.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
156
+ "blocks.13.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
157
+ "blocks.13.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
158
+ "blocks.13.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
159
+ "blocks.13.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
160
+ "blocks.13.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
161
+ "blocks.13.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
162
+ "blocks.13.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
163
+ "blocks.13.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
164
+ "blocks.13.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
165
+ "blocks.13.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
166
+ "blocks.13.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
167
+ "blocks.13.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
168
+ "blocks.13.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
169
+ "blocks.13.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
170
+ "blocks.13.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
171
+ "blocks.13.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
172
+ "blocks.13.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
173
+ "blocks.13.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
174
+ "blocks.13.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
175
+ "blocks.13.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
176
+ "blocks.14.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
177
+ "blocks.14.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
178
+ "blocks.14.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
179
+ "blocks.14.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
180
+ "blocks.14.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
181
+ "blocks.14.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
182
+ "blocks.14.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
183
+ "blocks.14.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
184
+ "blocks.14.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
185
+ "blocks.14.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
186
+ "blocks.14.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
187
+ "blocks.14.ffn.0.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
188
+ "blocks.14.ffn.2.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
189
+ "blocks.14.ffn.2.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
190
+ "blocks.14.modulation": "self_forcing_generator_bf16-00001-of-00006.pt",
191
+ "blocks.14.norm3.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
192
+ "blocks.14.norm3.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
193
+ "blocks.14.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
194
+ "blocks.14.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
195
+ "blocks.14.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
196
+ "blocks.14.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
197
+ "blocks.14.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
198
+ "blocks.14.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
199
+ "blocks.14.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
200
+ "blocks.14.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
201
+ "blocks.14.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
202
+ "blocks.14.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
203
+ "blocks.15.cross_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
204
+ "blocks.15.cross_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
205
+ "blocks.15.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
206
+ "blocks.15.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
207
+ "blocks.15.cross_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
208
+ "blocks.15.cross_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
209
+ "blocks.15.cross_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
210
+ "blocks.15.cross_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
211
+ "blocks.15.cross_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
212
+ "blocks.15.cross_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
213
+ "blocks.15.ffn.0.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
214
+ "blocks.15.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
215
+ "blocks.15.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
216
+ "blocks.15.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
217
+ "blocks.15.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
218
+ "blocks.15.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
219
+ "blocks.15.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
220
+ "blocks.15.self_attn.k.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
221
+ "blocks.15.self_attn.k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
222
+ "blocks.15.self_attn.norm_k.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
223
+ "blocks.15.self_attn.norm_q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
224
+ "blocks.15.self_attn.o.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
225
+ "blocks.15.self_attn.o.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
226
+ "blocks.15.self_attn.q.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
227
+ "blocks.15.self_attn.q.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
228
+ "blocks.15.self_attn.v.bias": "self_forcing_generator_bf16-00001-of-00006.pt",
229
+ "blocks.15.self_attn.v.weight": "self_forcing_generator_bf16-00001-of-00006.pt",
230
+ "blocks.16.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
231
+ "blocks.16.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
232
+ "blocks.16.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
233
+ "blocks.16.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
234
+ "blocks.16.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
235
+ "blocks.16.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
236
+ "blocks.16.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
237
+ "blocks.16.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
238
+ "blocks.16.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
239
+ "blocks.16.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
240
+ "blocks.16.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
241
+ "blocks.16.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
242
+ "blocks.16.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
243
+ "blocks.16.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
244
+ "blocks.16.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
245
+ "blocks.16.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
246
+ "blocks.16.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
247
+ "blocks.16.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
248
+ "blocks.16.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
249
+ "blocks.16.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
250
+ "blocks.16.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
251
+ "blocks.16.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
252
+ "blocks.16.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
253
+ "blocks.16.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
254
+ "blocks.16.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
255
+ "blocks.16.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
256
+ "blocks.16.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
257
+ "blocks.17.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
258
+ "blocks.17.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
259
+ "blocks.17.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
260
+ "blocks.17.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
261
+ "blocks.17.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
262
+ "blocks.17.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
263
+ "blocks.17.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
264
+ "blocks.17.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
265
+ "blocks.17.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
266
+ "blocks.17.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
267
+ "blocks.17.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
268
+ "blocks.17.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
269
+ "blocks.17.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
270
+ "blocks.17.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
271
+ "blocks.17.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
272
+ "blocks.17.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
273
+ "blocks.17.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
274
+ "blocks.17.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
275
+ "blocks.17.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
276
+ "blocks.17.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
277
+ "blocks.17.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
278
+ "blocks.17.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
279
+ "blocks.17.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
280
+ "blocks.17.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
281
+ "blocks.17.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
282
+ "blocks.17.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
283
+ "blocks.17.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
284
+ "blocks.18.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
285
+ "blocks.18.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
286
+ "blocks.18.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
287
+ "blocks.18.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
288
+ "blocks.18.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
289
+ "blocks.18.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
290
+ "blocks.18.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
291
+ "blocks.18.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
292
+ "blocks.18.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
293
+ "blocks.18.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
294
+ "blocks.18.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
295
+ "blocks.18.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
296
+ "blocks.18.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
297
+ "blocks.18.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
298
+ "blocks.18.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
299
+ "blocks.18.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
300
+ "blocks.18.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
301
+ "blocks.18.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
302
+ "blocks.18.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
303
+ "blocks.18.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
304
+ "blocks.18.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
305
+ "blocks.18.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
306
+ "blocks.18.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
307
+ "blocks.18.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
308
+ "blocks.18.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
309
+ "blocks.18.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
310
+ "blocks.18.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
311
+ "blocks.19.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
312
+ "blocks.19.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
313
+ "blocks.19.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
314
+ "blocks.19.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
315
+ "blocks.19.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
316
+ "blocks.19.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
317
+ "blocks.19.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
318
+ "blocks.19.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
319
+ "blocks.19.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
320
+ "blocks.19.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
321
+ "blocks.19.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
322
+ "blocks.19.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
323
+ "blocks.19.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
324
+ "blocks.19.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
325
+ "blocks.19.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
326
+ "blocks.19.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
327
+ "blocks.19.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
328
+ "blocks.19.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
329
+ "blocks.19.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
330
+ "blocks.19.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
331
+ "blocks.19.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
332
+ "blocks.19.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
333
+ "blocks.19.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
334
+ "blocks.19.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
335
+ "blocks.19.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
336
+ "blocks.19.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
337
+ "blocks.19.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
338
+ "blocks.2.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
339
+ "blocks.2.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
340
+ "blocks.2.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
341
+ "blocks.2.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
342
+ "blocks.2.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
343
+ "blocks.2.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
344
+ "blocks.2.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
345
+ "blocks.2.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
346
+ "blocks.2.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
347
+ "blocks.2.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
348
+ "blocks.2.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
349
+ "blocks.2.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
350
+ "blocks.2.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
351
+ "blocks.2.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
352
+ "blocks.2.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
353
+ "blocks.2.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
354
+ "blocks.2.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
355
+ "blocks.2.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
356
+ "blocks.2.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
357
+ "blocks.2.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
358
+ "blocks.2.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
359
+ "blocks.2.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
360
+ "blocks.2.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
361
+ "blocks.2.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
362
+ "blocks.2.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
363
+ "blocks.2.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
364
+ "blocks.2.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
365
+ "blocks.20.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
366
+ "blocks.20.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
367
+ "blocks.20.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
368
+ "blocks.20.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
369
+ "blocks.20.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
370
+ "blocks.20.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
371
+ "blocks.20.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
372
+ "blocks.20.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
373
+ "blocks.20.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
374
+ "blocks.20.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
375
+ "blocks.20.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
376
+ "blocks.20.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
377
+ "blocks.20.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
378
+ "blocks.20.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
379
+ "blocks.20.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
380
+ "blocks.20.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
381
+ "blocks.20.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
382
+ "blocks.20.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
383
+ "blocks.20.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
384
+ "blocks.20.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
385
+ "blocks.20.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
386
+ "blocks.20.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
387
+ "blocks.20.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
388
+ "blocks.20.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
389
+ "blocks.20.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
390
+ "blocks.20.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
391
+ "blocks.20.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
392
+ "blocks.21.cross_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
393
+ "blocks.21.cross_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
394
+ "blocks.21.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
395
+ "blocks.21.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
396
+ "blocks.21.cross_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
397
+ "blocks.21.cross_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
398
+ "blocks.21.cross_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
399
+ "blocks.21.cross_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
400
+ "blocks.21.cross_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
401
+ "blocks.21.cross_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
402
+ "blocks.21.ffn.0.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
403
+ "blocks.21.ffn.0.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
404
+ "blocks.21.ffn.2.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
405
+ "blocks.21.ffn.2.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
406
+ "blocks.21.modulation": "self_forcing_generator_bf16-00002-of-00006.pt",
407
+ "blocks.21.norm3.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
408
+ "blocks.21.norm3.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
409
+ "blocks.21.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
410
+ "blocks.21.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
411
+ "blocks.21.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
412
+ "blocks.21.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
413
+ "blocks.21.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
414
+ "blocks.21.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
415
+ "blocks.21.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
416
+ "blocks.21.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
417
+ "blocks.21.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
418
+ "blocks.21.self_attn.v.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
419
+ "blocks.22.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
420
+ "blocks.22.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
421
+ "blocks.22.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
422
+ "blocks.22.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
423
+ "blocks.22.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
424
+ "blocks.22.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
425
+ "blocks.22.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
426
+ "blocks.22.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
427
+ "blocks.22.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
428
+ "blocks.22.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
429
+ "blocks.22.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
430
+ "blocks.22.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
431
+ "blocks.22.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
432
+ "blocks.22.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
433
+ "blocks.22.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
434
+ "blocks.22.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
435
+ "blocks.22.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
436
+ "blocks.22.self_attn.k.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
437
+ "blocks.22.self_attn.k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
438
+ "blocks.22.self_attn.norm_k.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
439
+ "blocks.22.self_attn.norm_q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
440
+ "blocks.22.self_attn.o.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
441
+ "blocks.22.self_attn.o.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
442
+ "blocks.22.self_attn.q.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
443
+ "blocks.22.self_attn.q.weight": "self_forcing_generator_bf16-00002-of-00006.pt",
444
+ "blocks.22.self_attn.v.bias": "self_forcing_generator_bf16-00002-of-00006.pt",
445
+ "blocks.22.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
446
+ "blocks.23.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
447
+ "blocks.23.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
448
+ "blocks.23.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
449
+ "blocks.23.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
450
+ "blocks.23.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
451
+ "blocks.23.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
452
+ "blocks.23.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
453
+ "blocks.23.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
454
+ "blocks.23.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
455
+ "blocks.23.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
456
+ "blocks.23.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
457
+ "blocks.23.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
458
+ "blocks.23.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
459
+ "blocks.23.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
460
+ "blocks.23.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
461
+ "blocks.23.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
462
+ "blocks.23.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
463
+ "blocks.23.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
464
+ "blocks.23.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
465
+ "blocks.23.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
466
+ "blocks.23.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
467
+ "blocks.23.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
468
+ "blocks.23.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
469
+ "blocks.23.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
470
+ "blocks.23.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
471
+ "blocks.23.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
472
+ "blocks.23.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
473
+ "blocks.24.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
474
+ "blocks.24.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
475
+ "blocks.24.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
476
+ "blocks.24.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
477
+ "blocks.24.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
478
+ "blocks.24.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
479
+ "blocks.24.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
480
+ "blocks.24.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
481
+ "blocks.24.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
482
+ "blocks.24.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
483
+ "blocks.24.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
484
+ "blocks.24.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
485
+ "blocks.24.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
486
+ "blocks.24.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
487
+ "blocks.24.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
488
+ "blocks.24.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
489
+ "blocks.24.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
490
+ "blocks.24.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
491
+ "blocks.24.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
492
+ "blocks.24.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
493
+ "blocks.24.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
494
+ "blocks.24.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
495
+ "blocks.24.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
496
+ "blocks.24.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
497
+ "blocks.24.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
498
+ "blocks.24.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
499
+ "blocks.24.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
500
+ "blocks.25.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
501
+ "blocks.25.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
502
+ "blocks.25.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
503
+ "blocks.25.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
504
+ "blocks.25.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
505
+ "blocks.25.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
506
+ "blocks.25.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
507
+ "blocks.25.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
508
+ "blocks.25.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
509
+ "blocks.25.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
510
+ "blocks.25.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
511
+ "blocks.25.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
512
+ "blocks.25.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
513
+ "blocks.25.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
514
+ "blocks.25.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
515
+ "blocks.25.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
516
+ "blocks.25.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
517
+ "blocks.25.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
518
+ "blocks.25.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
519
+ "blocks.25.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
520
+ "blocks.25.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
521
+ "blocks.25.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
522
+ "blocks.25.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
523
+ "blocks.25.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
524
+ "blocks.25.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
525
+ "blocks.25.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
526
+ "blocks.25.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
527
+ "blocks.26.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
528
+ "blocks.26.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
529
+ "blocks.26.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
530
+ "blocks.26.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
531
+ "blocks.26.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
532
+ "blocks.26.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
533
+ "blocks.26.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
534
+ "blocks.26.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
535
+ "blocks.26.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
536
+ "blocks.26.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
537
+ "blocks.26.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
538
+ "blocks.26.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
539
+ "blocks.26.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
540
+ "blocks.26.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
541
+ "blocks.26.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
542
+ "blocks.26.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
543
+ "blocks.26.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
544
+ "blocks.26.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
545
+ "blocks.26.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
546
+ "blocks.26.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
547
+ "blocks.26.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
548
+ "blocks.26.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
549
+ "blocks.26.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
550
+ "blocks.26.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
551
+ "blocks.26.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
552
+ "blocks.26.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
553
+ "blocks.26.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
554
+ "blocks.27.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
555
+ "blocks.27.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
556
+ "blocks.27.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
557
+ "blocks.27.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
558
+ "blocks.27.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
559
+ "blocks.27.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
560
+ "blocks.27.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
561
+ "blocks.27.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
562
+ "blocks.27.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
563
+ "blocks.27.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
564
+ "blocks.27.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
565
+ "blocks.27.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
566
+ "blocks.27.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
567
+ "blocks.27.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
568
+ "blocks.27.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
569
+ "blocks.27.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
570
+ "blocks.27.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
571
+ "blocks.27.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
572
+ "blocks.27.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
573
+ "blocks.27.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
574
+ "blocks.27.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
575
+ "blocks.27.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
576
+ "blocks.27.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
577
+ "blocks.27.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
578
+ "blocks.27.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
579
+ "blocks.27.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
580
+ "blocks.27.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
581
+ "blocks.28.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
582
+ "blocks.28.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
583
+ "blocks.28.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
584
+ "blocks.28.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
585
+ "blocks.28.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
586
+ "blocks.28.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
587
+ "blocks.28.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
588
+ "blocks.28.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
589
+ "blocks.28.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
590
+ "blocks.28.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
591
+ "blocks.28.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
592
+ "blocks.28.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
593
+ "blocks.28.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
594
+ "blocks.28.ffn.2.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
595
+ "blocks.28.modulation": "self_forcing_generator_bf16-00003-of-00006.pt",
596
+ "blocks.28.norm3.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
597
+ "blocks.28.norm3.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
598
+ "blocks.28.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
599
+ "blocks.28.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
600
+ "blocks.28.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
601
+ "blocks.28.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
602
+ "blocks.28.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
603
+ "blocks.28.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
604
+ "blocks.28.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
605
+ "blocks.28.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
606
+ "blocks.28.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
607
+ "blocks.28.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
608
+ "blocks.29.cross_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
609
+ "blocks.29.cross_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
610
+ "blocks.29.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
611
+ "blocks.29.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
612
+ "blocks.29.cross_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
613
+ "blocks.29.cross_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
614
+ "blocks.29.cross_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
615
+ "blocks.29.cross_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
616
+ "blocks.29.cross_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
617
+ "blocks.29.cross_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
618
+ "blocks.29.ffn.0.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
619
+ "blocks.29.ffn.0.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
620
+ "blocks.29.ffn.2.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
621
+ "blocks.29.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
622
+ "blocks.29.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
623
+ "blocks.29.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
624
+ "blocks.29.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
625
+ "blocks.29.self_attn.k.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
626
+ "blocks.29.self_attn.k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
627
+ "blocks.29.self_attn.norm_k.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
628
+ "blocks.29.self_attn.norm_q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
629
+ "blocks.29.self_attn.o.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
630
+ "blocks.29.self_attn.o.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
631
+ "blocks.29.self_attn.q.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
632
+ "blocks.29.self_attn.q.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
633
+ "blocks.29.self_attn.v.bias": "self_forcing_generator_bf16-00003-of-00006.pt",
634
+ "blocks.29.self_attn.v.weight": "self_forcing_generator_bf16-00003-of-00006.pt",
635
+ "blocks.3.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
636
+ "blocks.3.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
637
+ "blocks.3.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
638
+ "blocks.3.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
639
+ "blocks.3.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
640
+ "blocks.3.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
641
+ "blocks.3.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
642
+ "blocks.3.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
643
+ "blocks.3.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
644
+ "blocks.3.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
645
+ "blocks.3.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
646
+ "blocks.3.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
647
+ "blocks.3.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
648
+ "blocks.3.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
649
+ "blocks.3.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
650
+ "blocks.3.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
651
+ "blocks.3.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
652
+ "blocks.3.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
653
+ "blocks.3.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
654
+ "blocks.3.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
655
+ "blocks.3.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
656
+ "blocks.3.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
657
+ "blocks.3.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
658
+ "blocks.3.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
659
+ "blocks.3.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
660
+ "blocks.3.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
661
+ "blocks.3.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
662
+ "blocks.30.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
663
+ "blocks.30.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
664
+ "blocks.30.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
665
+ "blocks.30.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
666
+ "blocks.30.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
667
+ "blocks.30.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
668
+ "blocks.30.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
669
+ "blocks.30.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
670
+ "blocks.30.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
671
+ "blocks.30.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
672
+ "blocks.30.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
673
+ "blocks.30.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
674
+ "blocks.30.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
675
+ "blocks.30.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
676
+ "blocks.30.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
677
+ "blocks.30.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
678
+ "blocks.30.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
679
+ "blocks.30.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
680
+ "blocks.30.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
681
+ "blocks.30.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
682
+ "blocks.30.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
683
+ "blocks.30.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
684
+ "blocks.30.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
685
+ "blocks.30.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
686
+ "blocks.30.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
687
+ "blocks.30.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
688
+ "blocks.30.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
689
+ "blocks.31.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
690
+ "blocks.31.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
691
+ "blocks.31.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
692
+ "blocks.31.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
693
+ "blocks.31.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
694
+ "blocks.31.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
695
+ "blocks.31.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
696
+ "blocks.31.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
697
+ "blocks.31.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
698
+ "blocks.31.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
699
+ "blocks.31.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
700
+ "blocks.31.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
701
+ "blocks.31.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
702
+ "blocks.31.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
703
+ "blocks.31.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
704
+ "blocks.31.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
705
+ "blocks.31.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
706
+ "blocks.31.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
707
+ "blocks.31.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
708
+ "blocks.31.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
709
+ "blocks.31.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
710
+ "blocks.31.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
711
+ "blocks.31.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
712
+ "blocks.31.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
713
+ "blocks.31.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
714
+ "blocks.31.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
715
+ "blocks.31.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
716
+ "blocks.32.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
717
+ "blocks.32.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
718
+ "blocks.32.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
719
+ "blocks.32.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
720
+ "blocks.32.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
721
+ "blocks.32.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
722
+ "blocks.32.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
723
+ "blocks.32.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
724
+ "blocks.32.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
725
+ "blocks.32.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
726
+ "blocks.32.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
727
+ "blocks.32.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
728
+ "blocks.32.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
729
+ "blocks.32.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
730
+ "blocks.32.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
731
+ "blocks.32.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
732
+ "blocks.32.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
733
+ "blocks.32.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
734
+ "blocks.32.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
735
+ "blocks.32.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
736
+ "blocks.32.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
737
+ "blocks.32.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
738
+ "blocks.32.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
739
+ "blocks.32.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
740
+ "blocks.32.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
741
+ "blocks.32.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
742
+ "blocks.32.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
743
+ "blocks.33.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
744
+ "blocks.33.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
745
+ "blocks.33.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
746
+ "blocks.33.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
747
+ "blocks.33.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
748
+ "blocks.33.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
749
+ "blocks.33.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
750
+ "blocks.33.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
751
+ "blocks.33.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
752
+ "blocks.33.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
753
+ "blocks.33.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
754
+ "blocks.33.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
755
+ "blocks.33.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
756
+ "blocks.33.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
757
+ "blocks.33.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
758
+ "blocks.33.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
759
+ "blocks.33.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
760
+ "blocks.33.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
761
+ "blocks.33.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
762
+ "blocks.33.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
763
+ "blocks.33.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
764
+ "blocks.33.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
765
+ "blocks.33.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
766
+ "blocks.33.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
767
+ "blocks.33.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
768
+ "blocks.33.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
769
+ "blocks.33.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
770
+ "blocks.34.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
771
+ "blocks.34.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
772
+ "blocks.34.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
773
+ "blocks.34.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
774
+ "blocks.34.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
775
+ "blocks.34.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
776
+ "blocks.34.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
777
+ "blocks.34.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
778
+ "blocks.34.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
779
+ "blocks.34.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
780
+ "blocks.34.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
781
+ "blocks.34.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
782
+ "blocks.34.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
783
+ "blocks.34.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
784
+ "blocks.34.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
785
+ "blocks.34.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
786
+ "blocks.34.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
787
+ "blocks.34.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
788
+ "blocks.34.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
789
+ "blocks.34.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
790
+ "blocks.34.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
791
+ "blocks.34.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
792
+ "blocks.34.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
793
+ "blocks.34.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
794
+ "blocks.34.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
795
+ "blocks.34.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
796
+ "blocks.34.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
797
+ "blocks.35.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
798
+ "blocks.35.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
799
+ "blocks.35.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
800
+ "blocks.35.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
801
+ "blocks.35.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
802
+ "blocks.35.cross_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
803
+ "blocks.35.cross_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
804
+ "blocks.35.cross_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
805
+ "blocks.35.cross_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
806
+ "blocks.35.cross_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
807
+ "blocks.35.ffn.0.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
808
+ "blocks.35.ffn.0.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
809
+ "blocks.35.ffn.2.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
810
+ "blocks.35.ffn.2.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
811
+ "blocks.35.modulation": "self_forcing_generator_bf16-00004-of-00006.pt",
812
+ "blocks.35.norm3.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
813
+ "blocks.35.norm3.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
814
+ "blocks.35.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
815
+ "blocks.35.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
816
+ "blocks.35.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
817
+ "blocks.35.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
818
+ "blocks.35.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
819
+ "blocks.35.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
820
+ "blocks.35.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
821
+ "blocks.35.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
822
+ "blocks.35.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
823
+ "blocks.35.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
824
+ "blocks.36.cross_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
825
+ "blocks.36.cross_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
826
+ "blocks.36.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
827
+ "blocks.36.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
828
+ "blocks.36.cross_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
829
+ "blocks.36.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
830
+ "blocks.36.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
831
+ "blocks.36.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
832
+ "blocks.36.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
833
+ "blocks.36.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
834
+ "blocks.36.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
835
+ "blocks.36.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
836
+ "blocks.36.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
837
+ "blocks.36.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
838
+ "blocks.36.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
839
+ "blocks.36.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
840
+ "blocks.36.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
841
+ "blocks.36.self_attn.k.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
842
+ "blocks.36.self_attn.k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
843
+ "blocks.36.self_attn.norm_k.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
844
+ "blocks.36.self_attn.norm_q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
845
+ "blocks.36.self_attn.o.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
846
+ "blocks.36.self_attn.o.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
847
+ "blocks.36.self_attn.q.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
848
+ "blocks.36.self_attn.q.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
849
+ "blocks.36.self_attn.v.bias": "self_forcing_generator_bf16-00004-of-00006.pt",
850
+ "blocks.36.self_attn.v.weight": "self_forcing_generator_bf16-00004-of-00006.pt",
851
+ "blocks.37.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
852
+ "blocks.37.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
853
+ "blocks.37.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
854
+ "blocks.37.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
855
+ "blocks.37.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
856
+ "blocks.37.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
857
+ "blocks.37.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
858
+ "blocks.37.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
859
+ "blocks.37.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
860
+ "blocks.37.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
861
+ "blocks.37.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
862
+ "blocks.37.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
863
+ "blocks.37.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
864
+ "blocks.37.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
865
+ "blocks.37.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
866
+ "blocks.37.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
867
+ "blocks.37.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
868
+ "blocks.37.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
869
+ "blocks.37.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
870
+ "blocks.37.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
871
+ "blocks.37.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
872
+ "blocks.37.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
873
+ "blocks.37.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
874
+ "blocks.37.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
875
+ "blocks.37.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
876
+ "blocks.37.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
877
+ "blocks.37.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
878
+ "blocks.38.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
879
+ "blocks.38.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
880
+ "blocks.38.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
881
+ "blocks.38.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
882
+ "blocks.38.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
883
+ "blocks.38.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
884
+ "blocks.38.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
885
+ "blocks.38.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
886
+ "blocks.38.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
887
+ "blocks.38.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
888
+ "blocks.38.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
889
+ "blocks.38.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
890
+ "blocks.38.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
891
+ "blocks.38.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
892
+ "blocks.38.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
893
+ "blocks.38.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
894
+ "blocks.38.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
895
+ "blocks.38.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
896
+ "blocks.38.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
897
+ "blocks.38.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
898
+ "blocks.38.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
899
+ "blocks.38.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
900
+ "blocks.38.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
901
+ "blocks.38.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
902
+ "blocks.38.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
903
+ "blocks.38.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
904
+ "blocks.38.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
905
+ "blocks.39.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
906
+ "blocks.39.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
907
+ "blocks.39.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
908
+ "blocks.39.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
909
+ "blocks.39.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
910
+ "blocks.39.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
911
+ "blocks.39.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
912
+ "blocks.39.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
913
+ "blocks.39.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
914
+ "blocks.39.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
915
+ "blocks.39.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
916
+ "blocks.39.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
917
+ "blocks.39.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
918
+ "blocks.39.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
919
+ "blocks.39.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
920
+ "blocks.39.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
921
+ "blocks.39.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
922
+ "blocks.39.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
923
+ "blocks.39.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
924
+ "blocks.39.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
925
+ "blocks.39.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
926
+ "blocks.39.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
927
+ "blocks.39.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
928
+ "blocks.39.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
929
+ "blocks.39.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
930
+ "blocks.39.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
931
+ "blocks.39.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
932
+ "blocks.4.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
933
+ "blocks.4.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
934
+ "blocks.4.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
935
+ "blocks.4.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
936
+ "blocks.4.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
937
+ "blocks.4.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
938
+ "blocks.4.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
939
+ "blocks.4.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
940
+ "blocks.4.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
941
+ "blocks.4.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
942
+ "blocks.4.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
943
+ "blocks.4.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
944
+ "blocks.4.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
945
+ "blocks.4.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
946
+ "blocks.4.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
947
+ "blocks.4.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
948
+ "blocks.4.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
949
+ "blocks.4.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
950
+ "blocks.4.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
951
+ "blocks.4.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
952
+ "blocks.4.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
953
+ "blocks.4.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
954
+ "blocks.4.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
955
+ "blocks.4.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
956
+ "blocks.4.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
957
+ "blocks.4.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
958
+ "blocks.4.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
959
+ "blocks.5.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
960
+ "blocks.5.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
961
+ "blocks.5.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
962
+ "blocks.5.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
963
+ "blocks.5.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
964
+ "blocks.5.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
965
+ "blocks.5.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
966
+ "blocks.5.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
967
+ "blocks.5.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
968
+ "blocks.5.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
969
+ "blocks.5.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
970
+ "blocks.5.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
971
+ "blocks.5.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
972
+ "blocks.5.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
973
+ "blocks.5.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
974
+ "blocks.5.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
975
+ "blocks.5.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
976
+ "blocks.5.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
977
+ "blocks.5.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
978
+ "blocks.5.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
979
+ "blocks.5.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
980
+ "blocks.5.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
981
+ "blocks.5.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
982
+ "blocks.5.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
983
+ "blocks.5.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
984
+ "blocks.5.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
985
+ "blocks.5.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
986
+ "blocks.6.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
987
+ "blocks.6.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
988
+ "blocks.6.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
989
+ "blocks.6.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
990
+ "blocks.6.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
991
+ "blocks.6.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
992
+ "blocks.6.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
993
+ "blocks.6.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
994
+ "blocks.6.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
995
+ "blocks.6.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
996
+ "blocks.6.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
997
+ "blocks.6.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
998
+ "blocks.6.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
999
+ "blocks.6.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1000
+ "blocks.6.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
1001
+ "blocks.6.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1002
+ "blocks.6.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1003
+ "blocks.6.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1004
+ "blocks.6.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1005
+ "blocks.6.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1006
+ "blocks.6.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1007
+ "blocks.6.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1008
+ "blocks.6.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1009
+ "blocks.6.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1010
+ "blocks.6.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1011
+ "blocks.6.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1012
+ "blocks.6.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1013
+ "blocks.7.cross_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1014
+ "blocks.7.cross_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1015
+ "blocks.7.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1016
+ "blocks.7.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1017
+ "blocks.7.cross_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1018
+ "blocks.7.cross_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1019
+ "blocks.7.cross_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1020
+ "blocks.7.cross_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1021
+ "blocks.7.cross_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1022
+ "blocks.7.cross_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1023
+ "blocks.7.ffn.0.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1024
+ "blocks.7.ffn.0.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1025
+ "blocks.7.ffn.2.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1026
+ "blocks.7.ffn.2.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1027
+ "blocks.7.modulation": "self_forcing_generator_bf16-00005-of-00006.pt",
1028
+ "blocks.7.norm3.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1029
+ "blocks.7.norm3.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1030
+ "blocks.7.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1031
+ "blocks.7.self_attn.k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1032
+ "blocks.7.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1033
+ "blocks.7.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1034
+ "blocks.7.self_attn.o.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1035
+ "blocks.7.self_attn.o.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1036
+ "blocks.7.self_attn.q.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1037
+ "blocks.7.self_attn.q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1038
+ "blocks.7.self_attn.v.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1039
+ "blocks.7.self_attn.v.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1040
+ "blocks.8.cross_attn.k.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1041
+ "blocks.8.cross_attn.k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1042
+ "blocks.8.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1043
+ "blocks.8.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1044
+ "blocks.8.cross_attn.o.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1045
+ "blocks.8.cross_attn.o.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1046
+ "blocks.8.cross_attn.q.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1047
+ "blocks.8.cross_attn.q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1048
+ "blocks.8.cross_attn.v.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1049
+ "blocks.8.cross_attn.v.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1050
+ "blocks.8.ffn.0.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1051
+ "blocks.8.ffn.0.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1052
+ "blocks.8.ffn.2.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1053
+ "blocks.8.ffn.2.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1054
+ "blocks.8.modulation": "self_forcing_generator_bf16-00006-of-00006.pt",
1055
+ "blocks.8.norm3.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1056
+ "blocks.8.norm3.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1057
+ "blocks.8.self_attn.k.bias": "self_forcing_generator_bf16-00005-of-00006.pt",
1058
+ "blocks.8.self_attn.k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1059
+ "blocks.8.self_attn.norm_k.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1060
+ "blocks.8.self_attn.norm_q.weight": "self_forcing_generator_bf16-00005-of-00006.pt",
1061
+ "blocks.8.self_attn.o.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1062
+ "blocks.8.self_attn.o.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1063
+ "blocks.8.self_attn.q.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1064
+ "blocks.8.self_attn.q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1065
+ "blocks.8.self_attn.v.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1066
+ "blocks.8.self_attn.v.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1067
+ "blocks.9.cross_attn.k.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1068
+ "blocks.9.cross_attn.k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1069
+ "blocks.9.cross_attn.norm_k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1070
+ "blocks.9.cross_attn.norm_q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1071
+ "blocks.9.cross_attn.o.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1072
+ "blocks.9.cross_attn.o.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1073
+ "blocks.9.cross_attn.q.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1074
+ "blocks.9.cross_attn.q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1075
+ "blocks.9.cross_attn.v.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1076
+ "blocks.9.cross_attn.v.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1077
+ "blocks.9.ffn.0.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1078
+ "blocks.9.ffn.0.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1079
+ "blocks.9.ffn.2.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1080
+ "blocks.9.ffn.2.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1081
+ "blocks.9.modulation": "self_forcing_generator_bf16-00006-of-00006.pt",
1082
+ "blocks.9.norm3.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1083
+ "blocks.9.norm3.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1084
+ "blocks.9.self_attn.k.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1085
+ "blocks.9.self_attn.k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1086
+ "blocks.9.self_attn.norm_k.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1087
+ "blocks.9.self_attn.norm_q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1088
+ "blocks.9.self_attn.o.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1089
+ "blocks.9.self_attn.o.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1090
+ "blocks.9.self_attn.q.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1091
+ "blocks.9.self_attn.q.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1092
+ "blocks.9.self_attn.v.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1093
+ "blocks.9.self_attn.v.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1094
+ "head.head.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1095
+ "head.head.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1096
+ "head.modulation": "self_forcing_generator_bf16-00006-of-00006.pt",
1097
+ "patch_embedding.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1098
+ "patch_embedding.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1099
+ "text_embedding.0.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1100
+ "text_embedding.0.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1101
+ "text_embedding.2.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1102
+ "text_embedding.2.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1103
+ "time_embedding.0.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1104
+ "time_embedding.0.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1105
+ "time_embedding.2.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1106
+ "time_embedding.2.weight": "self_forcing_generator_bf16-00006-of-00006.pt",
1107
+ "time_projection.1.bias": "self_forcing_generator_bf16-00006-of-00006.pt",
1108
+ "time_projection.1.weight": "self_forcing_generator_bf16-00006-of-00006.pt"
1109
+ }
1110
+ }