Neko-Institute-of-Science commited on
Commit
e2fc60b
·
1 Parent(s): 6c3e3e3

Upload OPT 175B NumPy weights.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +578 -0
  2. weights/decoder.embed_positions.weight +3 -0
  3. weights/decoder.embed_tokens.weight +3 -0
  4. weights/decoder.layer_norm.bias +0 -0
  5. weights/decoder.layer_norm.weight +0 -0
  6. weights/decoder.layers.0.fc1.bias +0 -0
  7. weights/decoder.layers.0.fc1.weight +3 -0
  8. weights/decoder.layers.0.fc2.bias +0 -0
  9. weights/decoder.layers.0.fc2.weight +3 -0
  10. weights/decoder.layers.0.final_layer_norm.bias +0 -0
  11. weights/decoder.layers.0.final_layer_norm.weight +0 -0
  12. weights/decoder.layers.0.self_attn.k_proj.bias +0 -0
  13. weights/decoder.layers.0.self_attn.k_proj.weight +3 -0
  14. weights/decoder.layers.0.self_attn.out_proj.bias +0 -0
  15. weights/decoder.layers.0.self_attn.out_proj.weight +3 -0
  16. weights/decoder.layers.0.self_attn.q_proj.bias +0 -0
  17. weights/decoder.layers.0.self_attn.q_proj.weight +3 -0
  18. weights/decoder.layers.0.self_attn.v_proj.bias +0 -0
  19. weights/decoder.layers.0.self_attn.v_proj.weight +3 -0
  20. weights/decoder.layers.0.self_attn_layer_norm.bias +0 -0
  21. weights/decoder.layers.0.self_attn_layer_norm.weight +0 -0
  22. weights/decoder.layers.1.fc1.bias +0 -0
  23. weights/decoder.layers.1.fc1.weight +3 -0
  24. weights/decoder.layers.1.fc2.bias +0 -0
  25. weights/decoder.layers.1.fc2.weight +3 -0
  26. weights/decoder.layers.1.final_layer_norm.bias +0 -0
  27. weights/decoder.layers.1.final_layer_norm.weight +0 -0
  28. weights/decoder.layers.1.self_attn.k_proj.bias +0 -0
  29. weights/decoder.layers.1.self_attn.k_proj.weight +3 -0
  30. weights/decoder.layers.1.self_attn.out_proj.bias +0 -0
  31. weights/decoder.layers.1.self_attn.out_proj.weight +3 -0
  32. weights/decoder.layers.1.self_attn.q_proj.bias +0 -0
  33. weights/decoder.layers.1.self_attn.q_proj.weight +3 -0
  34. weights/decoder.layers.1.self_attn.v_proj.bias +0 -0
  35. weights/decoder.layers.1.self_attn.v_proj.weight +3 -0
  36. weights/decoder.layers.1.self_attn_layer_norm.bias +0 -0
  37. weights/decoder.layers.1.self_attn_layer_norm.weight +0 -0
  38. weights/decoder.layers.10.fc1.bias +0 -0
  39. weights/decoder.layers.10.fc1.weight +3 -0
  40. weights/decoder.layers.10.fc2.bias +0 -0
  41. weights/decoder.layers.10.fc2.weight +3 -0
  42. weights/decoder.layers.10.final_layer_norm.bias +0 -0
  43. weights/decoder.layers.10.final_layer_norm.weight +0 -0
  44. weights/decoder.layers.10.self_attn.k_proj.bias +0 -0
  45. weights/decoder.layers.10.self_attn.k_proj.weight +3 -0
  46. weights/decoder.layers.10.self_attn.out_proj.bias +0 -0
  47. weights/decoder.layers.10.self_attn.out_proj.weight +3 -0
  48. weights/decoder.layers.10.self_attn.q_proj.bias +0 -0
  49. weights/decoder.layers.10.self_attn.q_proj.weight +3 -0
  50. weights/decoder.layers.10.self_attn.v_proj.bias +0 -0
.gitattributes CHANGED
@@ -32,3 +32,581 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ weights/decoder.embed_positions.weight filter=lfs diff=lfs merge=lfs -text
36
+ weights/decoder.embed_tokens.weight filter=lfs diff=lfs merge=lfs -text
37
+ weights/decoder.layers.0.fc1.weight filter=lfs diff=lfs merge=lfs -text
38
+ weights/decoder.layers.0.fc2.weight filter=lfs diff=lfs merge=lfs -text
39
+ weights/decoder.layers.0.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
40
+ weights/decoder.layers.0.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
41
+ weights/decoder.layers.0.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
42
+ weights/decoder.layers.0.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
43
+ weights/decoder.layers.1.fc1.weight filter=lfs diff=lfs merge=lfs -text
44
+ weights/decoder.layers.1.fc2.weight filter=lfs diff=lfs merge=lfs -text
45
+ weights/decoder.layers.1.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
46
+ weights/decoder.layers.1.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
47
+ weights/decoder.layers.1.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
48
+ weights/decoder.layers.1.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
49
+ weights/decoder.layers.10.fc1.weight filter=lfs diff=lfs merge=lfs -text
50
+ weights/decoder.layers.10.fc2.weight filter=lfs diff=lfs merge=lfs -text
51
+ weights/decoder.layers.10.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
52
+ weights/decoder.layers.10.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
53
+ weights/decoder.layers.10.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
54
+ weights/decoder.layers.10.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
55
+ weights/decoder.layers.11.fc1.weight filter=lfs diff=lfs merge=lfs -text
56
+ weights/decoder.layers.11.fc2.weight filter=lfs diff=lfs merge=lfs -text
57
+ weights/decoder.layers.11.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
58
+ weights/decoder.layers.11.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
59
+ weights/decoder.layers.11.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
60
+ weights/decoder.layers.11.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
61
+ weights/decoder.layers.12.fc1.weight filter=lfs diff=lfs merge=lfs -text
62
+ weights/decoder.layers.12.fc2.weight filter=lfs diff=lfs merge=lfs -text
63
+ weights/decoder.layers.12.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
64
+ weights/decoder.layers.12.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
65
+ weights/decoder.layers.12.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
66
+ weights/decoder.layers.12.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
67
+ weights/decoder.layers.13.fc1.weight filter=lfs diff=lfs merge=lfs -text
68
+ weights/decoder.layers.13.fc2.weight filter=lfs diff=lfs merge=lfs -text
69
+ weights/decoder.layers.13.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
70
+ weights/decoder.layers.13.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
71
+ weights/decoder.layers.13.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
72
+ weights/decoder.layers.13.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
73
+ weights/decoder.layers.14.fc1.weight filter=lfs diff=lfs merge=lfs -text
74
+ weights/decoder.layers.14.fc2.weight filter=lfs diff=lfs merge=lfs -text
75
+ weights/decoder.layers.14.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
76
+ weights/decoder.layers.14.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
77
+ weights/decoder.layers.14.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
78
+ weights/decoder.layers.14.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
79
+ weights/decoder.layers.15.fc1.weight filter=lfs diff=lfs merge=lfs -text
80
+ weights/decoder.layers.15.fc2.weight filter=lfs diff=lfs merge=lfs -text
81
+ weights/decoder.layers.15.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
82
+ weights/decoder.layers.15.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
83
+ weights/decoder.layers.15.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
84
+ weights/decoder.layers.15.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
85
+ weights/decoder.layers.16.fc1.weight filter=lfs diff=lfs merge=lfs -text
86
+ weights/decoder.layers.16.fc2.weight filter=lfs diff=lfs merge=lfs -text
87
+ weights/decoder.layers.16.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
88
+ weights/decoder.layers.16.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
89
+ weights/decoder.layers.16.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
90
+ weights/decoder.layers.16.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
91
+ weights/decoder.layers.17.fc1.weight filter=lfs diff=lfs merge=lfs -text
92
+ weights/decoder.layers.17.fc2.weight filter=lfs diff=lfs merge=lfs -text
93
+ weights/decoder.layers.17.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
94
+ weights/decoder.layers.17.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
95
+ weights/decoder.layers.17.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
96
+ weights/decoder.layers.17.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
97
+ weights/decoder.layers.18.fc1.weight filter=lfs diff=lfs merge=lfs -text
98
+ weights/decoder.layers.18.fc2.weight filter=lfs diff=lfs merge=lfs -text
99
+ weights/decoder.layers.18.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
100
+ weights/decoder.layers.18.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
101
+ weights/decoder.layers.18.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
102
+ weights/decoder.layers.18.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
103
+ weights/decoder.layers.19.fc1.weight filter=lfs diff=lfs merge=lfs -text
104
+ weights/decoder.layers.19.fc2.weight filter=lfs diff=lfs merge=lfs -text
105
+ weights/decoder.layers.19.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
106
+ weights/decoder.layers.19.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
107
+ weights/decoder.layers.19.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
108
+ weights/decoder.layers.19.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
109
+ weights/decoder.layers.2.fc1.weight filter=lfs diff=lfs merge=lfs -text
110
+ weights/decoder.layers.2.fc2.weight filter=lfs diff=lfs merge=lfs -text
111
+ weights/decoder.layers.2.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
112
+ weights/decoder.layers.2.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
113
+ weights/decoder.layers.2.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
114
+ weights/decoder.layers.2.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
115
+ weights/decoder.layers.20.fc1.weight filter=lfs diff=lfs merge=lfs -text
116
+ weights/decoder.layers.20.fc2.weight filter=lfs diff=lfs merge=lfs -text
117
+ weights/decoder.layers.20.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
118
+ weights/decoder.layers.20.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
119
+ weights/decoder.layers.20.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
120
+ weights/decoder.layers.20.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
121
+ weights/decoder.layers.21.fc1.weight filter=lfs diff=lfs merge=lfs -text
122
+ weights/decoder.layers.21.fc2.weight filter=lfs diff=lfs merge=lfs -text
123
+ weights/decoder.layers.21.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
124
+ weights/decoder.layers.21.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
125
+ weights/decoder.layers.21.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
126
+ weights/decoder.layers.21.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
127
+ weights/decoder.layers.22.fc1.weight filter=lfs diff=lfs merge=lfs -text
128
+ weights/decoder.layers.22.fc2.weight filter=lfs diff=lfs merge=lfs -text
129
+ weights/decoder.layers.22.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
130
+ weights/decoder.layers.22.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
131
+ weights/decoder.layers.22.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
132
+ weights/decoder.layers.22.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
133
+ weights/decoder.layers.23.fc1.weight filter=lfs diff=lfs merge=lfs -text
134
+ weights/decoder.layers.23.fc2.weight filter=lfs diff=lfs merge=lfs -text
135
+ weights/decoder.layers.23.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
136
+ weights/decoder.layers.23.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
137
+ weights/decoder.layers.23.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
138
+ weights/decoder.layers.23.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
139
+ weights/decoder.layers.24.fc1.weight filter=lfs diff=lfs merge=lfs -text
140
+ weights/decoder.layers.24.fc2.weight filter=lfs diff=lfs merge=lfs -text
141
+ weights/decoder.layers.24.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
142
+ weights/decoder.layers.24.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
143
+ weights/decoder.layers.24.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
144
+ weights/decoder.layers.24.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
145
+ weights/decoder.layers.25.fc1.weight filter=lfs diff=lfs merge=lfs -text
146
+ weights/decoder.layers.25.fc2.weight filter=lfs diff=lfs merge=lfs -text
147
+ weights/decoder.layers.25.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
148
+ weights/decoder.layers.25.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
149
+ weights/decoder.layers.25.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
150
+ weights/decoder.layers.25.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
151
+ weights/decoder.layers.26.fc1.weight filter=lfs diff=lfs merge=lfs -text
152
+ weights/decoder.layers.26.fc2.weight filter=lfs diff=lfs merge=lfs -text
153
+ weights/decoder.layers.26.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
154
+ weights/decoder.layers.26.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
155
+ weights/decoder.layers.26.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
156
+ weights/decoder.layers.26.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
157
+ weights/decoder.layers.27.fc1.weight filter=lfs diff=lfs merge=lfs -text
158
+ weights/decoder.layers.27.fc2.weight filter=lfs diff=lfs merge=lfs -text
159
+ weights/decoder.layers.27.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
160
+ weights/decoder.layers.27.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
161
+ weights/decoder.layers.27.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
162
+ weights/decoder.layers.27.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
163
+ weights/decoder.layers.28.fc1.weight filter=lfs diff=lfs merge=lfs -text
164
+ weights/decoder.layers.28.fc2.weight filter=lfs diff=lfs merge=lfs -text
165
+ weights/decoder.layers.28.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
166
+ weights/decoder.layers.28.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
167
+ weights/decoder.layers.28.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
168
+ weights/decoder.layers.28.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
169
+ weights/decoder.layers.29.fc1.weight filter=lfs diff=lfs merge=lfs -text
170
+ weights/decoder.layers.29.fc2.weight filter=lfs diff=lfs merge=lfs -text
171
+ weights/decoder.layers.29.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
172
+ weights/decoder.layers.29.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
173
+ weights/decoder.layers.29.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
174
+ weights/decoder.layers.29.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
175
+ weights/decoder.layers.3.fc1.weight filter=lfs diff=lfs merge=lfs -text
176
+ weights/decoder.layers.3.fc2.weight filter=lfs diff=lfs merge=lfs -text
177
+ weights/decoder.layers.3.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
178
+ weights/decoder.layers.3.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
179
+ weights/decoder.layers.3.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
180
+ weights/decoder.layers.3.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
181
+ weights/decoder.layers.30.fc1.weight filter=lfs diff=lfs merge=lfs -text
182
+ weights/decoder.layers.30.fc2.weight filter=lfs diff=lfs merge=lfs -text
183
+ weights/decoder.layers.30.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
184
+ weights/decoder.layers.30.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
185
+ weights/decoder.layers.30.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
186
+ weights/decoder.layers.30.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
187
+ weights/decoder.layers.31.fc1.weight filter=lfs diff=lfs merge=lfs -text
188
+ weights/decoder.layers.31.fc2.weight filter=lfs diff=lfs merge=lfs -text
189
+ weights/decoder.layers.31.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
190
+ weights/decoder.layers.31.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
191
+ weights/decoder.layers.31.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
192
+ weights/decoder.layers.31.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
193
+ weights/decoder.layers.32.fc1.weight filter=lfs diff=lfs merge=lfs -text
194
+ weights/decoder.layers.32.fc2.weight filter=lfs diff=lfs merge=lfs -text
195
+ weights/decoder.layers.32.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
196
+ weights/decoder.layers.32.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
197
+ weights/decoder.layers.32.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
198
+ weights/decoder.layers.32.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
199
+ weights/decoder.layers.33.fc1.weight filter=lfs diff=lfs merge=lfs -text
200
+ weights/decoder.layers.33.fc2.weight filter=lfs diff=lfs merge=lfs -text
201
+ weights/decoder.layers.33.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
202
+ weights/decoder.layers.33.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
203
+ weights/decoder.layers.33.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
204
+ weights/decoder.layers.33.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
205
+ weights/decoder.layers.34.fc1.weight filter=lfs diff=lfs merge=lfs -text
206
+ weights/decoder.layers.34.fc2.weight filter=lfs diff=lfs merge=lfs -text
207
+ weights/decoder.layers.34.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
208
+ weights/decoder.layers.34.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
209
+ weights/decoder.layers.34.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
210
+ weights/decoder.layers.34.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
211
+ weights/decoder.layers.35.fc1.weight filter=lfs diff=lfs merge=lfs -text
212
+ weights/decoder.layers.35.fc2.weight filter=lfs diff=lfs merge=lfs -text
213
+ weights/decoder.layers.35.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
214
+ weights/decoder.layers.35.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
215
+ weights/decoder.layers.35.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
216
+ weights/decoder.layers.35.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
217
+ weights/decoder.layers.36.fc1.weight filter=lfs diff=lfs merge=lfs -text
218
+ weights/decoder.layers.36.fc2.weight filter=lfs diff=lfs merge=lfs -text
219
+ weights/decoder.layers.36.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
220
+ weights/decoder.layers.36.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
221
+ weights/decoder.layers.36.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
222
+ weights/decoder.layers.36.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
223
+ weights/decoder.layers.37.fc1.weight filter=lfs diff=lfs merge=lfs -text
224
+ weights/decoder.layers.37.fc2.weight filter=lfs diff=lfs merge=lfs -text
225
+ weights/decoder.layers.37.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
226
+ weights/decoder.layers.37.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
227
+ weights/decoder.layers.37.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
228
+ weights/decoder.layers.37.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
229
+ weights/decoder.layers.38.fc1.weight filter=lfs diff=lfs merge=lfs -text
230
+ weights/decoder.layers.38.fc2.weight filter=lfs diff=lfs merge=lfs -text
231
+ weights/decoder.layers.38.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
232
+ weights/decoder.layers.38.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
233
+ weights/decoder.layers.38.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
234
+ weights/decoder.layers.38.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
235
+ weights/decoder.layers.39.fc1.weight filter=lfs diff=lfs merge=lfs -text
236
+ weights/decoder.layers.39.fc2.weight filter=lfs diff=lfs merge=lfs -text
237
+ weights/decoder.layers.39.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
238
+ weights/decoder.layers.39.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
239
+ weights/decoder.layers.39.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
240
+ weights/decoder.layers.39.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
241
+ weights/decoder.layers.4.fc1.weight filter=lfs diff=lfs merge=lfs -text
242
+ weights/decoder.layers.4.fc2.weight filter=lfs diff=lfs merge=lfs -text
243
+ weights/decoder.layers.4.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
244
+ weights/decoder.layers.4.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
245
+ weights/decoder.layers.4.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
246
+ weights/decoder.layers.4.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
247
+ weights/decoder.layers.40.fc1.weight filter=lfs diff=lfs merge=lfs -text
248
+ weights/decoder.layers.40.fc2.weight filter=lfs diff=lfs merge=lfs -text
249
+ weights/decoder.layers.40.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
250
+ weights/decoder.layers.40.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
251
+ weights/decoder.layers.40.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
252
+ weights/decoder.layers.40.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
253
+ weights/decoder.layers.41.fc1.weight filter=lfs diff=lfs merge=lfs -text
254
+ weights/decoder.layers.41.fc2.weight filter=lfs diff=lfs merge=lfs -text
255
+ weights/decoder.layers.41.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
256
+ weights/decoder.layers.41.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
257
+ weights/decoder.layers.41.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
258
+ weights/decoder.layers.41.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
259
+ weights/decoder.layers.42.fc1.weight filter=lfs diff=lfs merge=lfs -text
260
+ weights/decoder.layers.42.fc2.weight filter=lfs diff=lfs merge=lfs -text
261
+ weights/decoder.layers.42.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
262
+ weights/decoder.layers.42.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
263
+ weights/decoder.layers.42.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
264
+ weights/decoder.layers.42.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
265
+ weights/decoder.layers.43.fc1.weight filter=lfs diff=lfs merge=lfs -text
266
+ weights/decoder.layers.43.fc2.weight filter=lfs diff=lfs merge=lfs -text
267
+ weights/decoder.layers.43.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
268
+ weights/decoder.layers.43.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
269
+ weights/decoder.layers.43.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
270
+ weights/decoder.layers.43.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
271
+ weights/decoder.layers.44.fc1.weight filter=lfs diff=lfs merge=lfs -text
272
+ weights/decoder.layers.44.fc2.weight filter=lfs diff=lfs merge=lfs -text
273
+ weights/decoder.layers.44.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
274
+ weights/decoder.layers.44.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
275
+ weights/decoder.layers.44.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
276
+ weights/decoder.layers.44.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
277
+ weights/decoder.layers.45.fc1.weight filter=lfs diff=lfs merge=lfs -text
278
+ weights/decoder.layers.45.fc2.weight filter=lfs diff=lfs merge=lfs -text
279
+ weights/decoder.layers.45.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
280
+ weights/decoder.layers.45.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
281
+ weights/decoder.layers.45.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
282
+ weights/decoder.layers.45.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
283
+ weights/decoder.layers.46.fc1.weight filter=lfs diff=lfs merge=lfs -text
284
+ weights/decoder.layers.46.fc2.weight filter=lfs diff=lfs merge=lfs -text
285
+ weights/decoder.layers.46.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
286
+ weights/decoder.layers.46.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
287
+ weights/decoder.layers.46.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
288
+ weights/decoder.layers.46.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
289
+ weights/decoder.layers.47.fc1.weight filter=lfs diff=lfs merge=lfs -text
290
+ weights/decoder.layers.47.fc2.weight filter=lfs diff=lfs merge=lfs -text
291
+ weights/decoder.layers.47.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
292
+ weights/decoder.layers.47.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
293
+ weights/decoder.layers.47.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
294
+ weights/decoder.layers.47.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
295
+ weights/decoder.layers.48.fc1.weight filter=lfs diff=lfs merge=lfs -text
296
+ weights/decoder.layers.48.fc2.weight filter=lfs diff=lfs merge=lfs -text
297
+ weights/decoder.layers.48.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
298
+ weights/decoder.layers.48.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
299
+ weights/decoder.layers.48.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
300
+ weights/decoder.layers.48.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
301
+ weights/decoder.layers.49.fc1.weight filter=lfs diff=lfs merge=lfs -text
302
+ weights/decoder.layers.49.fc2.weight filter=lfs diff=lfs merge=lfs -text
303
+ weights/decoder.layers.49.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
304
+ weights/decoder.layers.49.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
305
+ weights/decoder.layers.49.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
306
+ weights/decoder.layers.49.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
307
+ weights/decoder.layers.5.fc1.weight filter=lfs diff=lfs merge=lfs -text
308
+ weights/decoder.layers.5.fc2.weight filter=lfs diff=lfs merge=lfs -text
309
+ weights/decoder.layers.5.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
310
+ weights/decoder.layers.5.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
311
+ weights/decoder.layers.5.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
312
+ weights/decoder.layers.5.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
313
+ weights/decoder.layers.50.fc1.weight filter=lfs diff=lfs merge=lfs -text
314
+ weights/decoder.layers.50.fc2.weight filter=lfs diff=lfs merge=lfs -text
315
+ weights/decoder.layers.50.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
316
+ weights/decoder.layers.50.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
317
+ weights/decoder.layers.50.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
318
+ weights/decoder.layers.50.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
319
+ weights/decoder.layers.51.fc1.weight filter=lfs diff=lfs merge=lfs -text
320
+ weights/decoder.layers.51.fc2.weight filter=lfs diff=lfs merge=lfs -text
321
+ weights/decoder.layers.51.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
322
+ weights/decoder.layers.51.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
323
+ weights/decoder.layers.51.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
324
+ weights/decoder.layers.51.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
325
+ weights/decoder.layers.52.fc1.weight filter=lfs diff=lfs merge=lfs -text
326
+ weights/decoder.layers.52.fc2.weight filter=lfs diff=lfs merge=lfs -text
327
+ weights/decoder.layers.52.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
328
+ weights/decoder.layers.52.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
329
+ weights/decoder.layers.52.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
330
+ weights/decoder.layers.52.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
331
+ weights/decoder.layers.53.fc1.weight filter=lfs diff=lfs merge=lfs -text
332
+ weights/decoder.layers.53.fc2.weight filter=lfs diff=lfs merge=lfs -text
333
+ weights/decoder.layers.53.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
334
+ weights/decoder.layers.53.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
335
+ weights/decoder.layers.53.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
336
+ weights/decoder.layers.53.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
337
+ weights/decoder.layers.54.fc1.weight filter=lfs diff=lfs merge=lfs -text
338
+ weights/decoder.layers.54.fc2.weight filter=lfs diff=lfs merge=lfs -text
339
+ weights/decoder.layers.54.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
340
+ weights/decoder.layers.54.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
341
+ weights/decoder.layers.54.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
342
+ weights/decoder.layers.54.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
343
+ weights/decoder.layers.55.fc1.weight filter=lfs diff=lfs merge=lfs -text
344
+ weights/decoder.layers.55.fc2.weight filter=lfs diff=lfs merge=lfs -text
345
+ weights/decoder.layers.55.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
346
+ weights/decoder.layers.55.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
347
+ weights/decoder.layers.55.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
348
+ weights/decoder.layers.55.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
349
+ weights/decoder.layers.56.fc1.weight filter=lfs diff=lfs merge=lfs -text
350
+ weights/decoder.layers.56.fc2.weight filter=lfs diff=lfs merge=lfs -text
351
+ weights/decoder.layers.56.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
352
+ weights/decoder.layers.56.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
353
+ weights/decoder.layers.56.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
354
+ weights/decoder.layers.56.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
355
+ weights/decoder.layers.57.fc1.weight filter=lfs diff=lfs merge=lfs -text
356
+ weights/decoder.layers.57.fc2.weight filter=lfs diff=lfs merge=lfs -text
357
+ weights/decoder.layers.57.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
358
+ weights/decoder.layers.57.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
359
+ weights/decoder.layers.57.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
360
+ weights/decoder.layers.57.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
361
+ weights/decoder.layers.58.fc1.weight filter=lfs diff=lfs merge=lfs -text
362
+ weights/decoder.layers.58.fc2.weight filter=lfs diff=lfs merge=lfs -text
363
+ weights/decoder.layers.58.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
364
+ weights/decoder.layers.58.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
365
+ weights/decoder.layers.58.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
366
+ weights/decoder.layers.58.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
367
+ weights/decoder.layers.59.fc1.weight filter=lfs diff=lfs merge=lfs -text
368
+ weights/decoder.layers.59.fc2.weight filter=lfs diff=lfs merge=lfs -text
369
+ weights/decoder.layers.59.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
370
+ weights/decoder.layers.59.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
371
+ weights/decoder.layers.59.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
372
+ weights/decoder.layers.59.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
373
+ weights/decoder.layers.6.fc1.weight filter=lfs diff=lfs merge=lfs -text
374
+ weights/decoder.layers.6.fc2.weight filter=lfs diff=lfs merge=lfs -text
375
+ weights/decoder.layers.6.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
376
+ weights/decoder.layers.6.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
377
+ weights/decoder.layers.6.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
378
+ weights/decoder.layers.6.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
379
+ weights/decoder.layers.60.fc1.weight filter=lfs diff=lfs merge=lfs -text
380
+ weights/decoder.layers.60.fc2.weight filter=lfs diff=lfs merge=lfs -text
381
+ weights/decoder.layers.60.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
382
+ weights/decoder.layers.60.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
383
+ weights/decoder.layers.60.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
384
+ weights/decoder.layers.60.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
385
+ weights/decoder.layers.61.fc1.weight filter=lfs diff=lfs merge=lfs -text
386
+ weights/decoder.layers.61.fc2.weight filter=lfs diff=lfs merge=lfs -text
387
+ weights/decoder.layers.61.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
388
+ weights/decoder.layers.61.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
389
+ weights/decoder.layers.61.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
390
+ weights/decoder.layers.61.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
391
+ weights/decoder.layers.62.fc1.weight filter=lfs diff=lfs merge=lfs -text
392
+ weights/decoder.layers.62.fc2.weight filter=lfs diff=lfs merge=lfs -text
393
+ weights/decoder.layers.62.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
394
+ weights/decoder.layers.62.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
395
+ weights/decoder.layers.62.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
396
+ weights/decoder.layers.62.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
397
+ weights/decoder.layers.63.fc1.weight filter=lfs diff=lfs merge=lfs -text
398
+ weights/decoder.layers.63.fc2.weight filter=lfs diff=lfs merge=lfs -text
399
+ weights/decoder.layers.63.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
400
+ weights/decoder.layers.63.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
401
+ weights/decoder.layers.63.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
402
+ weights/decoder.layers.63.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
403
+ weights/decoder.layers.64.fc1.weight filter=lfs diff=lfs merge=lfs -text
404
+ weights/decoder.layers.64.fc2.weight filter=lfs diff=lfs merge=lfs -text
405
+ weights/decoder.layers.64.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
406
+ weights/decoder.layers.64.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
407
+ weights/decoder.layers.64.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
408
+ weights/decoder.layers.64.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
409
+ weights/decoder.layers.65.fc1.weight filter=lfs diff=lfs merge=lfs -text
410
+ weights/decoder.layers.65.fc2.weight filter=lfs diff=lfs merge=lfs -text
411
+ weights/decoder.layers.65.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
412
+ weights/decoder.layers.65.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
413
+ weights/decoder.layers.65.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
414
+ weights/decoder.layers.65.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
415
+ weights/decoder.layers.66.fc1.weight filter=lfs diff=lfs merge=lfs -text
416
+ weights/decoder.layers.66.fc2.weight filter=lfs diff=lfs merge=lfs -text
417
+ weights/decoder.layers.66.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
418
+ weights/decoder.layers.66.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
419
+ weights/decoder.layers.66.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
420
+ weights/decoder.layers.66.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
421
+ weights/decoder.layers.67.fc1.weight filter=lfs diff=lfs merge=lfs -text
422
+ weights/decoder.layers.67.fc2.weight filter=lfs diff=lfs merge=lfs -text
423
+ weights/decoder.layers.67.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
424
+ weights/decoder.layers.67.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
425
+ weights/decoder.layers.67.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
426
+ weights/decoder.layers.67.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
427
+ weights/decoder.layers.68.fc1.weight filter=lfs diff=lfs merge=lfs -text
428
+ weights/decoder.layers.68.fc2.weight filter=lfs diff=lfs merge=lfs -text
429
+ weights/decoder.layers.68.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
430
+ weights/decoder.layers.68.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
431
+ weights/decoder.layers.68.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
432
+ weights/decoder.layers.68.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
433
+ weights/decoder.layers.69.fc1.weight filter=lfs diff=lfs merge=lfs -text
434
+ weights/decoder.layers.69.fc2.weight filter=lfs diff=lfs merge=lfs -text
435
+ weights/decoder.layers.69.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
436
+ weights/decoder.layers.69.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
437
+ weights/decoder.layers.69.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
438
+ weights/decoder.layers.69.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
439
+ weights/decoder.layers.7.fc1.weight filter=lfs diff=lfs merge=lfs -text
440
+ weights/decoder.layers.7.fc2.weight filter=lfs diff=lfs merge=lfs -text
441
+ weights/decoder.layers.7.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
442
+ weights/decoder.layers.7.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
443
+ weights/decoder.layers.7.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
444
+ weights/decoder.layers.7.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
445
+ weights/decoder.layers.70.fc1.weight filter=lfs diff=lfs merge=lfs -text
446
+ weights/decoder.layers.70.fc2.weight filter=lfs diff=lfs merge=lfs -text
447
+ weights/decoder.layers.70.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
448
+ weights/decoder.layers.70.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
449
+ weights/decoder.layers.70.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
450
+ weights/decoder.layers.70.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
451
+ weights/decoder.layers.71.fc1.weight filter=lfs diff=lfs merge=lfs -text
452
+ weights/decoder.layers.71.fc2.weight filter=lfs diff=lfs merge=lfs -text
453
+ weights/decoder.layers.71.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
454
+ weights/decoder.layers.71.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
455
+ weights/decoder.layers.71.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
456
+ weights/decoder.layers.71.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
457
+ weights/decoder.layers.72.fc1.weight filter=lfs diff=lfs merge=lfs -text
458
+ weights/decoder.layers.72.fc2.weight filter=lfs diff=lfs merge=lfs -text
459
+ weights/decoder.layers.72.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
460
+ weights/decoder.layers.72.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
461
+ weights/decoder.layers.72.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
462
+ weights/decoder.layers.72.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
463
+ weights/decoder.layers.73.fc1.weight filter=lfs diff=lfs merge=lfs -text
464
+ weights/decoder.layers.73.fc2.weight filter=lfs diff=lfs merge=lfs -text
465
+ weights/decoder.layers.73.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
466
+ weights/decoder.layers.73.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
467
+ weights/decoder.layers.73.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
468
+ weights/decoder.layers.73.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
469
+ weights/decoder.layers.74.fc1.weight filter=lfs diff=lfs merge=lfs -text
470
+ weights/decoder.layers.74.fc2.weight filter=lfs diff=lfs merge=lfs -text
471
+ weights/decoder.layers.74.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
472
+ weights/decoder.layers.74.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
473
+ weights/decoder.layers.74.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
474
+ weights/decoder.layers.74.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
475
+ weights/decoder.layers.75.fc1.weight filter=lfs diff=lfs merge=lfs -text
476
+ weights/decoder.layers.75.fc2.weight filter=lfs diff=lfs merge=lfs -text
477
+ weights/decoder.layers.75.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
478
+ weights/decoder.layers.75.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
479
+ weights/decoder.layers.75.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
480
+ weights/decoder.layers.75.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
481
+ weights/decoder.layers.76.fc1.weight filter=lfs diff=lfs merge=lfs -text
482
+ weights/decoder.layers.76.fc2.weight filter=lfs diff=lfs merge=lfs -text
483
+ weights/decoder.layers.76.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
484
+ weights/decoder.layers.76.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
485
+ weights/decoder.layers.76.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
486
+ weights/decoder.layers.76.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
487
+ weights/decoder.layers.77.fc1.weight filter=lfs diff=lfs merge=lfs -text
488
+ weights/decoder.layers.77.fc2.weight filter=lfs diff=lfs merge=lfs -text
489
+ weights/decoder.layers.77.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
490
+ weights/decoder.layers.77.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
491
+ weights/decoder.layers.77.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
492
+ weights/decoder.layers.77.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
493
+ weights/decoder.layers.78.fc1.weight filter=lfs diff=lfs merge=lfs -text
494
+ weights/decoder.layers.78.fc2.weight filter=lfs diff=lfs merge=lfs -text
495
+ weights/decoder.layers.78.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
496
+ weights/decoder.layers.78.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
497
+ weights/decoder.layers.78.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
498
+ weights/decoder.layers.78.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
499
+ weights/decoder.layers.79.fc1.weight filter=lfs diff=lfs merge=lfs -text
500
+ weights/decoder.layers.79.fc2.weight filter=lfs diff=lfs merge=lfs -text
501
+ weights/decoder.layers.79.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
502
+ weights/decoder.layers.79.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
503
+ weights/decoder.layers.79.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
504
+ weights/decoder.layers.79.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
505
+ weights/decoder.layers.8.fc1.weight filter=lfs diff=lfs merge=lfs -text
506
+ weights/decoder.layers.8.fc2.weight filter=lfs diff=lfs merge=lfs -text
507
+ weights/decoder.layers.8.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
508
+ weights/decoder.layers.8.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
509
+ weights/decoder.layers.8.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
510
+ weights/decoder.layers.8.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
511
+ weights/decoder.layers.80.fc1.weight filter=lfs diff=lfs merge=lfs -text
512
+ weights/decoder.layers.80.fc2.weight filter=lfs diff=lfs merge=lfs -text
513
+ weights/decoder.layers.80.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
514
+ weights/decoder.layers.80.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
515
+ weights/decoder.layers.80.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
516
+ weights/decoder.layers.80.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
517
+ weights/decoder.layers.81.fc1.weight filter=lfs diff=lfs merge=lfs -text
518
+ weights/decoder.layers.81.fc2.weight filter=lfs diff=lfs merge=lfs -text
519
+ weights/decoder.layers.81.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
520
+ weights/decoder.layers.81.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
521
+ weights/decoder.layers.81.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
522
+ weights/decoder.layers.81.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
523
+ weights/decoder.layers.82.fc1.weight filter=lfs diff=lfs merge=lfs -text
524
+ weights/decoder.layers.82.fc2.weight filter=lfs diff=lfs merge=lfs -text
525
+ weights/decoder.layers.82.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
526
+ weights/decoder.layers.82.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
527
+ weights/decoder.layers.82.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
528
+ weights/decoder.layers.82.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
529
+ weights/decoder.layers.83.fc1.weight filter=lfs diff=lfs merge=lfs -text
530
+ weights/decoder.layers.83.fc2.weight filter=lfs diff=lfs merge=lfs -text
531
+ weights/decoder.layers.83.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
532
+ weights/decoder.layers.83.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
533
+ weights/decoder.layers.83.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
534
+ weights/decoder.layers.83.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
535
+ weights/decoder.layers.84.fc1.weight filter=lfs diff=lfs merge=lfs -text
536
+ weights/decoder.layers.84.fc2.weight filter=lfs diff=lfs merge=lfs -text
537
+ weights/decoder.layers.84.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
538
+ weights/decoder.layers.84.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
539
+ weights/decoder.layers.84.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
540
+ weights/decoder.layers.84.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
541
+ weights/decoder.layers.85.fc1.weight filter=lfs diff=lfs merge=lfs -text
542
+ weights/decoder.layers.85.fc2.weight filter=lfs diff=lfs merge=lfs -text
543
+ weights/decoder.layers.85.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
544
+ weights/decoder.layers.85.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
545
+ weights/decoder.layers.85.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
546
+ weights/decoder.layers.85.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
547
+ weights/decoder.layers.86.fc1.weight filter=lfs diff=lfs merge=lfs -text
548
+ weights/decoder.layers.86.fc2.weight filter=lfs diff=lfs merge=lfs -text
549
+ weights/decoder.layers.86.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
550
+ weights/decoder.layers.86.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
551
+ weights/decoder.layers.86.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
552
+ weights/decoder.layers.86.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
553
+ weights/decoder.layers.87.fc1.weight filter=lfs diff=lfs merge=lfs -text
554
+ weights/decoder.layers.87.fc2.weight filter=lfs diff=lfs merge=lfs -text
555
+ weights/decoder.layers.87.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
556
+ weights/decoder.layers.87.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
557
+ weights/decoder.layers.87.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
558
+ weights/decoder.layers.87.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
559
+ weights/decoder.layers.88.fc1.weight filter=lfs diff=lfs merge=lfs -text
560
+ weights/decoder.layers.88.fc2.weight filter=lfs diff=lfs merge=lfs -text
561
+ weights/decoder.layers.88.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
562
+ weights/decoder.layers.88.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
563
+ weights/decoder.layers.88.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
564
+ weights/decoder.layers.88.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
565
+ weights/decoder.layers.89.fc1.weight filter=lfs diff=lfs merge=lfs -text
566
+ weights/decoder.layers.89.fc2.weight filter=lfs diff=lfs merge=lfs -text
567
+ weights/decoder.layers.89.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
568
+ weights/decoder.layers.89.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
569
+ weights/decoder.layers.89.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
570
+ weights/decoder.layers.89.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
571
+ weights/decoder.layers.9.fc1.weight filter=lfs diff=lfs merge=lfs -text
572
+ weights/decoder.layers.9.fc2.weight filter=lfs diff=lfs merge=lfs -text
573
+ weights/decoder.layers.9.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
574
+ weights/decoder.layers.9.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
575
+ weights/decoder.layers.9.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
576
+ weights/decoder.layers.9.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
577
+ weights/decoder.layers.90.fc1.weight filter=lfs diff=lfs merge=lfs -text
578
+ weights/decoder.layers.90.fc2.weight filter=lfs diff=lfs merge=lfs -text
579
+ weights/decoder.layers.90.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
580
+ weights/decoder.layers.90.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
581
+ weights/decoder.layers.90.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
582
+ weights/decoder.layers.90.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
583
+ weights/decoder.layers.91.fc1.weight filter=lfs diff=lfs merge=lfs -text
584
+ weights/decoder.layers.91.fc2.weight filter=lfs diff=lfs merge=lfs -text
585
+ weights/decoder.layers.91.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
586
+ weights/decoder.layers.91.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
587
+ weights/decoder.layers.91.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
588
+ weights/decoder.layers.91.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
589
+ weights/decoder.layers.92.fc1.weight filter=lfs diff=lfs merge=lfs -text
590
+ weights/decoder.layers.92.fc2.weight filter=lfs diff=lfs merge=lfs -text
591
+ weights/decoder.layers.92.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
592
+ weights/decoder.layers.92.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
593
+ weights/decoder.layers.92.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
594
+ weights/decoder.layers.92.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
595
+ weights/decoder.layers.93.fc1.weight filter=lfs diff=lfs merge=lfs -text
596
+ weights/decoder.layers.93.fc2.weight filter=lfs diff=lfs merge=lfs -text
597
+ weights/decoder.layers.93.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
598
+ weights/decoder.layers.93.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
599
+ weights/decoder.layers.93.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
600
+ weights/decoder.layers.93.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
601
+ weights/decoder.layers.94.fc1.weight filter=lfs diff=lfs merge=lfs -text
602
+ weights/decoder.layers.94.fc2.weight filter=lfs diff=lfs merge=lfs -text
603
+ weights/decoder.layers.94.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
604
+ weights/decoder.layers.94.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
605
+ weights/decoder.layers.94.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
606
+ weights/decoder.layers.94.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
607
+ weights/decoder.layers.95.fc1.weight filter=lfs diff=lfs merge=lfs -text
608
+ weights/decoder.layers.95.fc2.weight filter=lfs diff=lfs merge=lfs -text
609
+ weights/decoder.layers.95.self_attn.k_proj.weight filter=lfs diff=lfs merge=lfs -text
610
+ weights/decoder.layers.95.self_attn.out_proj.weight filter=lfs diff=lfs merge=lfs -text
611
+ weights/decoder.layers.95.self_attn.q_proj.weight filter=lfs diff=lfs merge=lfs -text
612
+ weights/decoder.layers.95.self_attn.v_proj.weight filter=lfs diff=lfs merge=lfs -text
weights/decoder.embed_positions.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1333b170c2ebe95489f734d987e666c3ca47d33934af211a63701c90553b17d7
3
+ size 50380928
weights/decoder.embed_tokens.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3455388c792c86f5921803e12b7f39a46e1e0ca6aca6f76d9ef662473486eab9
3
+ size 1235484800
weights/decoder.layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.fc1.bias ADDED
Binary file (98.4 kB). View file
 
weights/decoder.layers.0.fc1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa07921ea62baf81f6664f03ad51f4cbf3668caaaa79e0bf6d076ca3ef016f64
3
+ size 1207959680
weights/decoder.layers.0.fc2.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.fc2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d5177a4fccae4c2acaa5eb6f05b63d6cab3e69f9db44dbfe0612343002a32f
3
+ size 1207959680
weights/decoder.layers.0.final_layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.final_layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn.k_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn.k_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409f705d4dfdc60bba51453010214f2a12bb221d7142543cac893090ee65cf5b
3
+ size 301990016
weights/decoder.layers.0.self_attn.out_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn.out_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30be210eda5110041793ff63c5bce9e06e509cbd72c5fc793f3b4c211a4bbb2
3
+ size 301990016
weights/decoder.layers.0.self_attn.q_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn.q_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78115bb8deb547c7afde2d241666744d65189d716f8e6b9cea20f5d60939e16f
3
+ size 301990016
weights/decoder.layers.0.self_attn.v_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn.v_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34fdba6b6f573d84132fea7f3fd54a56671c56a595259c2a21f28ce4e268139
3
+ size 301990016
weights/decoder.layers.0.self_attn_layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.0.self_attn_layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.fc1.bias ADDED
Binary file (98.4 kB). View file
 
weights/decoder.layers.1.fc1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe8dbb24aad421922cc1d833e85f30dfa14e74ef2e25cc045a3a67eb60169b9
3
+ size 1207959680
weights/decoder.layers.1.fc2.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.fc2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2afe4a55d289497b4de26c653289cb6b52b2d0c6f86c036b6053d8a060dc47
3
+ size 1207959680
weights/decoder.layers.1.final_layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.final_layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn.k_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn.k_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b076a127ce085e0a6efeff9a1e1c2d293b5ad80b58287bbbadd09fb27e1874
3
+ size 301990016
weights/decoder.layers.1.self_attn.out_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn.out_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a64306d38e8bcc72001bb07de5d9f57a82f0477cc63bd0c2d621d43a174a8787
3
+ size 301990016
weights/decoder.layers.1.self_attn.q_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn.q_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dcd34581f1bbb6e2c34265ad729f19fb8a1e535c5109e406f101e402730fd77
3
+ size 301990016
weights/decoder.layers.1.self_attn.v_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn.v_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922162d40e00d3d3578722f77014eb45f0a5a011724eb6d736482b082fd4fe4a
3
+ size 301990016
weights/decoder.layers.1.self_attn_layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.1.self_attn_layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.fc1.bias ADDED
Binary file (98.4 kB). View file
 
weights/decoder.layers.10.fc1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38908b772ba11e62be09e8fcf0d0eaa0d8d22abd71b4e160057f46e109ff601
3
+ size 1207959680
weights/decoder.layers.10.fc2.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.fc2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60126258486fca152e124890d4ded8c34bf0e63047e14791ec9253658c9f6012
3
+ size 1207959680
weights/decoder.layers.10.final_layer_norm.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.final_layer_norm.weight ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.self_attn.k_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.self_attn.k_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890471f9c84b9728bda496c5e1d5e8296872fea5dc26b7d049df001d50cd6580
3
+ size 301990016
weights/decoder.layers.10.self_attn.out_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.self_attn.out_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4325c9342c8e8d79df9ba917006690a75a23b71b043aab66f11bf1a3511e0aba
3
+ size 301990016
weights/decoder.layers.10.self_attn.q_proj.bias ADDED
Binary file (24.7 kB). View file
 
weights/decoder.layers.10.self_attn.q_proj.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de92359ccadd02a93624b174f8c4dc49086b1677a9cd26fa056f7a94bff531c9
3
+ size 301990016
weights/decoder.layers.10.self_attn.v_proj.bias ADDED
Binary file (24.7 kB). View file