| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43111443519592285, |
| "max": 0.2988463342189789, |
| "mean": -0.0025462331250309944, |
| "std": 0.04255734384059906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06311740726232529, |
| "max": 0.10821832716464996, |
| "mean": 0.0006233985768631101, |
| "std": 0.03409506380558014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41270628571510315, |
| "max": 0.8365904092788696, |
| "mean": -0.0002062078274320811, |
| "std": 0.024108584970235825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11594842374324799, |
| "max": 0.323304146528244, |
| "mean": -0.0009396584937348962, |
| "std": 0.019620178267359734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.8046321868896484, |
| "max": 2.8845088481903076, |
| "mean": -0.00036305765388533473, |
| "std": 0.615403413772583, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2803097069263458, |
| "max": 0.3821697235107422, |
| "mean": 0.0004250165948178619, |
| "std": 0.042748384177684784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22351907193660736, |
| "max": 0.21069680154323578, |
| "mean": -0.004498748108744621, |
| "std": 0.04097301885485649, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4281409978866577, |
| "max": 0.47565823793411255, |
| "mean": 3.041478066734271e-06, |
| "std": 0.024508286267518997, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32690364122390747, |
| "max": 0.15677706897258759, |
| "mean": -0.04671286791563034, |
| "std": 0.05161474645137787, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41106897592544556, |
| "max": 0.3550392687320709, |
| "mean": -0.00012950549717061222, |
| "std": 0.023600473999977112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.23076964914798737, |
| "max": 0.2638300061225891, |
| "mean": -0.029151970520615578, |
| "std": 0.049401458352804184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25456827878952026, |
| "max": 0.8219638466835022, |
| "mean": 0.525442898273468, |
| "std": 0.08086482435464859, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2974269390106201, |
| "max": 0.26618602871894836, |
| "mean": -0.0004250289057381451, |
| "std": 0.0321008674800396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09282378107309341, |
| "max": 0.12510952353477478, |
| "mean": 0.0006503364420495927, |
| "std": 0.025732681155204773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29088306427001953, |
| "max": 0.28188201785087585, |
| "mean": -7.563710096292198e-05, |
| "std": 0.030931729823350906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.909866809844971, |
| "max": 5.824496746063232, |
| "mean": -0.009385589510202408, |
| "std": 1.2966406345367432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4253852665424347, |
| "max": 0.34430131316185, |
| "mean": 9.75119328359142e-05, |
| "std": 0.02995217591524124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028903231024742126, |
| "max": 0.027659673243761063, |
| "mean": -0.00031527443206869066, |
| "std": 0.012571859173476696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45454347133636475, |
| "max": 0.44891107082366943, |
| "mean": 2.3480326490243897e-05, |
| "std": 0.023853568360209465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08878406882286072, |
| "max": 0.09124661237001419, |
| "mean": 0.002279076725244522, |
| "std": 0.019516194239258766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2667350471019745, |
| "max": 1.0590577125549316, |
| "mean": 0.5311722159385681, |
| "std": 0.10455667227506638, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5753205418586731, |
| "max": 0.6092038154602051, |
| "mean": -0.0004317538405302912, |
| "std": 0.038596246391534805, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18242540955543518, |
| "max": 0.04575135558843613, |
| "mean": -0.02945941686630249, |
| "std": 0.04261056333780289, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.167878270149231, |
| "max": 1.6351370811462402, |
| "mean": 0.00032057490898296237, |
| "std": 0.02769383229315281, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.1625949591398239, |
| "max": 0.2059435099363327, |
| "mean": -0.02112039364874363, |
| "std": 0.027941575273871422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22422762215137482, |
| "max": 0.8458681702613831, |
| "mean": 0.4875890910625458, |
| "std": 0.07528901100158691, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2560153305530548, |
| "max": 0.3063727021217346, |
| "mean": -8.626433555036783e-06, |
| "std": 0.033470120280981064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09546571969985962, |
| "max": 0.11066073924303055, |
| "mean": 5.8840945712290704e-05, |
| "std": 0.026972563937306404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2978975474834442, |
| "max": 0.29693126678466797, |
| "mean": 5.199259248911403e-05, |
| "std": 0.03254008665680885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.169106960296631, |
| "max": 5.089260578155518, |
| "mean": -0.014622640796005726, |
| "std": 1.1580101251602173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3452591896057129, |
| "max": 0.3437287509441376, |
| "mean": 7.87251628935337e-05, |
| "std": 0.030058259144425392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03609376400709152, |
| "max": 0.03314271569252014, |
| "mean": -0.00014089577598497272, |
| "std": 0.013021372258663177, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3159167468547821, |
| "max": 0.37570273876190186, |
| "mean": -2.126370236510411e-05, |
| "std": 0.024055330082774162, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10549593716859818, |
| "max": 0.1221165731549263, |
| "mean": -0.0019639446400105953, |
| "std": 0.028849009424448013, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3116210103034973, |
| "max": 1.1235315799713135, |
| "mean": 0.6662613153457642, |
| "std": 0.09780054539442062, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.872847855091095, |
| "max": 0.6278241872787476, |
| "mean": 0.0016755674732849002, |
| "std": 0.047437313944101334, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2716394066810608, |
| "max": 0.03413696587085724, |
| "mean": -0.0466003455221653, |
| "std": 0.04061445966362953, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9222021102905273, |
| "max": 0.9650114178657532, |
| "mean": 0.0010224997531622648, |
| "std": 0.04070303216576576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14480018615722656, |
| "max": 0.07504245638847351, |
| "mean": -0.00909046083688736, |
| "std": 0.025704393163323402, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23979389667510986, |
| "max": 0.7145018577575684, |
| "mean": 0.4472465217113495, |
| "std": 0.059433478862047195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2733098268508911, |
| "max": 0.2983761131763458, |
| "mean": 9.066419806913473e-06, |
| "std": 0.03547072410583496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11928554624319077, |
| "max": 0.11867407709360123, |
| "mean": 0.0007565614068880677, |
| "std": 0.02763325348496437, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28173530101776123, |
| "max": 0.2804112136363983, |
| "mean": -7.68975296523422e-05, |
| "std": 0.03510041534900665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.51193904876709, |
| "max": 2.5239455699920654, |
| "mean": 0.026779357343912125, |
| "std": 0.5869050621986389, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2215055674314499, |
| "max": 0.2721182703971863, |
| "mean": 2.8998874768149108e-06, |
| "std": 0.030730824917554855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03334304690361023, |
| "max": 0.031320393085479736, |
| "mean": 0.00011074724898207933, |
| "std": 0.012403324246406555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23567309975624084, |
| "max": 0.2320062220096588, |
| "mean": 5.707715899916366e-05, |
| "std": 0.025695981457829475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13582320511341095, |
| "max": 0.1279149055480957, |
| "mean": -0.005496869329363108, |
| "std": 0.03996486961841583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3545507788658142, |
| "max": 1.1755321025848389, |
| "mean": 0.7105286121368408, |
| "std": 0.10380106419324875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.618323802947998, |
| "max": 0.5557036995887756, |
| "mean": 0.0011603902094066143, |
| "std": 0.046115029603242874, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18935386836528778, |
| "max": 0.024935415014624596, |
| "mean": -0.03484790399670601, |
| "std": 0.028624996542930603, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1329621076583862, |
| "max": 0.9724080562591553, |
| "mean": 0.00035803488572128117, |
| "std": 0.042342979460954666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5985916256904602, |
| "max": 0.06294681131839752, |
| "mean": -0.0048767137341201305, |
| "std": 0.028625035658478737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37523797154426575, |
| "max": 0.9426477551460266, |
| "mean": 0.5925332903862, |
| "std": 0.06714636832475662, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3922964930534363, |
| "max": 0.37001147866249084, |
| "mean": 7.055637979647145e-05, |
| "std": 0.03718561306595802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11894690245389938, |
| "max": 0.13649211823940277, |
| "mean": 0.0009205802925862372, |
| "std": 0.029216548427939415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6203529834747314, |
| "max": 0.509852409362793, |
| "mean": 1.5258530766004696e-05, |
| "std": 0.03643907234072685, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.195601463317871, |
| "max": 8.798324584960938, |
| "mean": -0.10935366153717041, |
| "std": 1.6999714374542236, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27709993720054626, |
| "max": 0.24029740691184998, |
| "mean": 5.252830669633113e-05, |
| "std": 0.032612841576337814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05198528617620468, |
| "max": 0.03960206359624863, |
| "mean": 8.789013372734189e-05, |
| "std": 0.012959298677742481, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23129259049892426, |
| "max": 0.23536467552185059, |
| "mean": -2.1845989977009594e-05, |
| "std": 0.029389241710305214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.2045416533946991, |
| "max": 0.10547658056020737, |
| "mean": -0.004024041350930929, |
| "std": 0.03263028338551521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.33950191736221313, |
| "max": 1.0151382684707642, |
| "mean": 0.7007080316543579, |
| "std": 0.09671688079833984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5657932162284851, |
| "max": 0.8349727988243103, |
| "mean": 0.00041512559982948005, |
| "std": 0.04229608178138733, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21222105622291565, |
| "max": 0.030380746349692345, |
| "mean": -0.03218400478363037, |
| "std": 0.026512378826737404, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7566999793052673, |
| "max": 0.7205860018730164, |
| "mean": -1.3569264410762116e-05, |
| "std": 0.036836523562669754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2636493444442749, |
| "max": 0.10622138530015945, |
| "mean": -0.0030191433615982533, |
| "std": 0.0288657546043396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.284244179725647, |
| "max": 0.6968931555747986, |
| "mean": 0.49943026900291443, |
| "std": 0.046561453491449356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27927035093307495, |
| "max": 0.23469851911067963, |
| "mean": -0.00011116769746877253, |
| "std": 0.038758207112550735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15420791506767273, |
| "max": 0.12671181559562683, |
| "mean": -0.002232905477285385, |
| "std": 0.03338504582643509, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41528424620628357, |
| "max": 0.6604220271110535, |
| "mean": -1.9215509382775053e-05, |
| "std": 0.03909698873758316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.243428707122803, |
| "max": 4.728596210479736, |
| "mean": -0.020457647740840912, |
| "std": 1.0080652236938477, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24574802815914154, |
| "max": 0.20800377428531647, |
| "mean": 4.4111799070378765e-05, |
| "std": 0.0339629240334034, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03446226194500923, |
| "max": 0.04489393159747124, |
| "mean": -1.5458615962415934e-05, |
| "std": 0.012629742734134197, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.2015937864780426, |
| "max": 0.20673099160194397, |
| "mean": -2.9244030884001404e-05, |
| "std": 0.03102072887122631, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.20010024309158325, |
| "max": 0.11358015239238739, |
| "mean": -0.0029013892635703087, |
| "std": 0.03451463207602501, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36685705184936523, |
| "max": 1.0600172281265259, |
| "mean": 0.6705178022384644, |
| "std": 0.06640052795410156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39914920926094055, |
| "max": 0.5031230449676514, |
| "mean": -3.865663893520832e-05, |
| "std": 0.04113178327679634, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12865233421325684, |
| "max": 0.026885882019996643, |
| "mean": -0.030540671199560165, |
| "std": 0.02188955619931221, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4503399133682251, |
| "max": 0.4341718554496765, |
| "mean": 7.837524026399478e-05, |
| "std": 0.03489154577255249, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2677534520626068, |
| "max": 0.07295451313257217, |
| "mean": -0.0010977284982800484, |
| "std": 0.023126663640141487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28732216358184814, |
| "max": 0.687613844871521, |
| "mean": 0.5245327353477478, |
| "std": 0.047577910125255585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22290916740894318, |
| "max": 0.22416770458221436, |
| "mean": 1.5896670447546057e-05, |
| "std": 0.03894934430718422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13659609854221344, |
| "max": 0.10938586294651031, |
| "mean": 0.0002443990088067949, |
| "std": 0.029240434989333153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37579256296157837, |
| "max": 0.43812817335128784, |
| "mean": -9.537441655993462e-06, |
| "std": 0.03928641602396965, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8499395847320557, |
| "max": 5.004647254943848, |
| "mean": 0.009758757427334785, |
| "std": 0.8455180525779724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.2236318439245224, |
| "max": 0.22071507573127747, |
| "mean": -4.0232407627627254e-07, |
| "std": 0.034410055726766586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04383794590830803, |
| "max": 0.03584868088364601, |
| "mean": -0.00026072480250149965, |
| "std": 0.012076611630618572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21360361576080322, |
| "max": 0.1891404688358307, |
| "mean": -1.7133981600636616e-05, |
| "std": 0.03153670206665993, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18102218210697174, |
| "max": 0.12101027369499207, |
| "mean": -0.002398766577243805, |
| "std": 0.04126044735312462, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.422617107629776, |
| "max": 0.9454182982444763, |
| "mean": 0.6626853942871094, |
| "std": 0.05683305859565735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3716322183609009, |
| "max": 0.47696027159690857, |
| "mean": -8.185259503079578e-05, |
| "std": 0.040890805423259735, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.2088262289762497, |
| "max": 0.027207661420106888, |
| "mean": -0.03023664839565754, |
| "std": 0.021368583664298058, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3415319621562958, |
| "max": 0.735925555229187, |
| "mean": 8.314158185385168e-05, |
| "std": 0.034767184406518936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.24044273793697357, |
| "max": 0.05069386586546898, |
| "mean": -0.0011902841506525874, |
| "std": 0.020465629175305367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.30604928731918335, |
| "max": 0.6555026769638062, |
| "mean": 0.5250788331031799, |
| "std": 0.04609908536076546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3050762414932251, |
| "max": 0.21783104538917542, |
| "mean": 6.997165473876521e-05, |
| "std": 0.039496470242738724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14947636425495148, |
| "max": 0.13131970167160034, |
| "mean": 0.00033609665115363896, |
| "std": 0.03047223575413227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2578710615634918, |
| "max": 0.20255950093269348, |
| "mean": 3.1238341762218624e-05, |
| "std": 0.03948673978447914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.339573621749878, |
| "max": 2.379251480102539, |
| "mean": -0.02625335566699505, |
| "std": 0.4500052034854889, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.1892782300710678, |
| "max": 0.21099112927913666, |
| "mean": 3.7314141081878915e-05, |
| "std": 0.03479423746466637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03169188275933266, |
| "max": 0.03571836277842522, |
| "mean": -0.00019686334417201579, |
| "std": 0.012292133644223213, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.1888986974954605, |
| "max": 0.17091436684131622, |
| "mean": -6.82127574691549e-05, |
| "std": 0.032170820981264114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13952063024044037, |
| "max": 0.13709284365177155, |
| "mean": -0.0025128263514488935, |
| "std": 0.0512898713350296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670536518096924, |
| "max": 0.9585899710655212, |
| "mean": 0.6689007878303528, |
| "std": 0.05285040661692619, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3248884379863739, |
| "max": 0.3098326325416565, |
| "mean": -1.0356043276260607e-06, |
| "std": 0.04094681516289711, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12497521936893463, |
| "max": 0.02554607018828392, |
| "mean": -0.030699055641889572, |
| "std": 0.019824611023068428, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.4409962594509125, |
| "max": 0.44632241129875183, |
| "mean": 9.430450154468417e-05, |
| "std": 0.03512001410126686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22476668655872345, |
| "max": 0.051897041499614716, |
| "mean": -0.0011790284188464284, |
| "std": 0.018472088500857353, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3393557369709015, |
| "max": 0.7416696548461914, |
| "mean": 0.5586937069892883, |
| "std": 0.04142747446894646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2734062075614929, |
| "max": 0.2793632745742798, |
| "mean": 2.0294006390031427e-05, |
| "std": 0.04105808213353157, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13707204163074493, |
| "max": 0.14009879529476166, |
| "mean": 0.0004904167726635933, |
| "std": 0.02664206363260746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49139103293418884, |
| "max": 0.35644298791885376, |
| "mean": 8.893347694538534e-05, |
| "std": 0.04069600626826286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2994801998138428, |
| "max": 1.7469841241836548, |
| "mean": -0.021084124222397804, |
| "std": 0.500186562538147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2184700220823288, |
| "max": 0.1981830596923828, |
| "mean": -4.060107676195912e-05, |
| "std": 0.03423382714390755, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04127173125743866, |
| "max": 0.03881501033902168, |
| "mean": -0.00013771075464319438, |
| "std": 0.012880227528512478, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17825232446193695, |
| "max": 0.18374156951904297, |
| "mean": 4.785084456671029e-05, |
| "std": 0.031557004898786545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.18023589253425598, |
| "max": 0.18417657911777496, |
| "mean": -0.002215688582509756, |
| "std": 0.05483615770936012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742925763130188, |
| "max": 1.0284452438354492, |
| "mean": 0.6453101634979248, |
| "std": 0.05053440108895302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27223968505859375, |
| "max": 0.30990350246429443, |
| "mean": 0.00011251836258452386, |
| "std": 0.04068317264318466, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10583628714084625, |
| "max": 0.02672600746154785, |
| "mean": -0.02951621636748314, |
| "std": 0.01793462224304676, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.339975506067276, |
| "max": 0.3303821086883545, |
| "mean": 5.460641114041209e-05, |
| "std": 0.034413956105709076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.1819038987159729, |
| "max": 0.0424266941845417, |
| "mean": -0.0010654201032593846, |
| "std": 0.01721329055726528, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3252944052219391, |
| "max": 0.688383936882019, |
| "mean": 0.5112100839614868, |
| "std": 0.036942265927791595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2345394641160965, |
| "max": 0.22607795894145966, |
| "mean": -3.624632518040016e-05, |
| "std": 0.039177343249320984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11556069552898407, |
| "max": 0.13209758698940277, |
| "mean": 0.00015118884039111435, |
| "std": 0.029196659103035927, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3532617390155792, |
| "max": 0.2856779992580414, |
| "mean": 7.000558980507776e-06, |
| "std": 0.0392458438873291, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.136237621307373, |
| "max": 3.547076940536499, |
| "mean": -0.011597944423556328, |
| "std": 0.6828959584236145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21137045323848724, |
| "max": 0.20969942212104797, |
| "mean": 3.464317342150025e-05, |
| "std": 0.03448577970266342, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03584721311926842, |
| "max": 0.048106979578733444, |
| "mean": 0.0007941541844047606, |
| "std": 0.012865344993770123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.2109234631061554, |
| "max": 0.19350647926330566, |
| "mean": -1.076167109204107e-06, |
| "std": 0.03169678896665573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18694967031478882, |
| "max": 0.17746947705745697, |
| "mean": -0.002843617694452405, |
| "std": 0.0586174838244915, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.474641889333725, |
| "max": 1.0443058013916016, |
| "mean": 0.6514294147491455, |
| "std": 0.0498916432261467, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24857543408870697, |
| "max": 0.3296365737915039, |
| "mean": 0.00018093036487698555, |
| "std": 0.040571410208940506, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12483743578195572, |
| "max": 0.024654541164636612, |
| "mean": -0.030496058985590935, |
| "std": 0.01760769635438919, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4221171438694, |
| "max": 0.4831203818321228, |
| "mean": 1.3900153135182336e-06, |
| "std": 0.03539836406707764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15169401466846466, |
| "max": 0.043601393699645996, |
| "mean": 4.186587466392666e-05, |
| "std": 0.014870981685817242, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31570297479629517, |
| "max": 0.6836181879043579, |
| "mean": 0.5528991222381592, |
| "std": 0.04067207872867584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20685237646102905, |
| "max": 0.22020350396633148, |
| "mean": 3.1496565497945994e-05, |
| "std": 0.038300175219774246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13801881670951843, |
| "max": 0.1128397211432457, |
| "mean": 1.9543484086170793e-05, |
| "std": 0.02582789771258831, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4035792350769043, |
| "max": 0.37189632654190063, |
| "mean": 2.57877072726842e-05, |
| "std": 0.03818116337060928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.776683807373047, |
| "max": 2.873103380203247, |
| "mean": 0.0011591403745114803, |
| "std": 0.5172097086906433, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20364898443222046, |
| "max": 0.19804270565509796, |
| "mean": 2.963895894936286e-05, |
| "std": 0.03429786115884781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05086854100227356, |
| "max": 0.03999151289463043, |
| "mean": -0.00042562291491776705, |
| "std": 0.01342119462788105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19656670093536377, |
| "max": 0.20230703055858612, |
| "mean": -1.2472472008084878e-05, |
| "std": 0.031806014478206635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19329077005386353, |
| "max": 0.1953459531068802, |
| "mean": -0.002963340375572443, |
| "std": 0.06254669278860092, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3491152226924896, |
| "max": 1.0867162942886353, |
| "mean": 0.6672079563140869, |
| "std": 0.055482182651758194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22604526579380035, |
| "max": 0.25199154019355774, |
| "mean": 0.00035888003185391426, |
| "std": 0.04076085984706879, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09107685089111328, |
| "max": 0.043750207871198654, |
| "mean": -0.030080880969762802, |
| "std": 0.017612501978874207, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.354022353887558, |
| "max": 0.3047710955142975, |
| "mean": -4.505186007008888e-05, |
| "std": 0.03712347894906998, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16208632290363312, |
| "max": 0.06347470730543137, |
| "mean": -7.683466537855566e-05, |
| "std": 0.01941368170082569, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34881117939949036, |
| "max": 0.7244766354560852, |
| "mean": 0.5423683524131775, |
| "std": 0.039119552820920944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21985284984111786, |
| "max": 0.22366879880428314, |
| "mean": -1.1181864465470426e-05, |
| "std": 0.03923165425658226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11856226623058319, |
| "max": 0.17077098786830902, |
| "mean": 0.0002904185967054218, |
| "std": 0.025113951414823532, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24732813239097595, |
| "max": 0.30149152874946594, |
| "mean": -3.663568713818677e-05, |
| "std": 0.03893101587891579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.509943962097168, |
| "max": 3.719674825668335, |
| "mean": 0.015853645280003548, |
| "std": 0.7831405401229858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21940433979034424, |
| "max": 0.2380109429359436, |
| "mean": -1.3181561371311545e-05, |
| "std": 0.036304209381341934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04728918895125389, |
| "max": 0.05147355794906616, |
| "mean": 0.00047950932639651, |
| "std": 0.01351844146847725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21457946300506592, |
| "max": 0.21772831678390503, |
| "mean": 5.6543191021773964e-05, |
| "std": 0.03361648693680763, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21175915002822876, |
| "max": 0.2316361367702484, |
| "mean": -0.005104508716613054, |
| "std": 0.06187352165579796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36198312044143677, |
| "max": 1.1043850183486938, |
| "mean": 0.6993494629859924, |
| "std": 0.0538649819791317, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23541490733623505, |
| "max": 0.24545514583587646, |
| "mean": 0.0004635048389900476, |
| "std": 0.0412699356675148, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09819761663675308, |
| "max": 0.06812109053134918, |
| "mean": -0.03143283352255821, |
| "std": 0.018124457448720932, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.302616149187088, |
| "max": 0.3526079058647156, |
| "mean": -8.239349699579179e-05, |
| "std": 0.04027572274208069, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1525425761938095, |
| "max": 0.14988082647323608, |
| "mean": 0.00025950101553462446, |
| "std": 0.02303888648748398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9994731545448303, |
| "max": 1.0051331520080566, |
| "mean": 1.0006828308105469, |
| "std": 0.0018997839652001858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031253598630428314, |
| "max": 0.03125074878334999, |
| "mean": -1.9291795979370363e-05, |
| "std": 0.018041806295514107, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031226053833961487, |
| "max": 0.030990969389677048, |
| "mean": -0.0010842140763998032, |
| "std": 0.01795150525867939, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03125230595469475, |
| "max": 0.031255852431058884, |
| "mean": 3.5468428905005567e-06, |
| "std": 0.01804220862686634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031155752018094063, |
| "max": 0.031177222728729248, |
| "mean": 0.0003338717215228826, |
| "std": 0.018063681200146675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.00039401825051754713, |
| "max": 0.00042413949267938733, |
| "mean": 2.811485501297284e-06, |
| "std": 0.00013175072672311217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9984285831451416, |
| "max": 1.0057381391525269, |
| "mean": 1.0001252889633179, |
| "std": 0.0012227989500388503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03248310461640358, |
| "max": 0.03276699408888817, |
| "mean": -6.534818567160983e-06, |
| "std": 0.01804283820092678, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03245115652680397, |
| "max": 0.032321732491254807, |
| "mean": -6.833355291746557e-05, |
| "std": 0.017962154000997543, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.001046429155394435, |
| "max": 0.001021245145238936, |
| "mean": 1.2730889693557401e-06, |
| "std": 0.00019014839199371636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.00038878852501511574, |
| "max": 0.0004429140826687217, |
| "mean": 4.41432621300919e-06, |
| "std": 0.00012222054647281766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3831113874912262, |
| "max": 0.7217056155204773, |
| "mean": 0.5806930065155029, |
| "std": 0.03891616314649582, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23930218815803528, |
| "max": 0.19694408774375916, |
| "mean": 2.6163981601712294e-05, |
| "std": 0.03746587410569191, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11892960965633392, |
| "max": 0.16658687591552734, |
| "mean": 0.0009876482654362917, |
| "std": 0.027559131383895874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2469177097082138, |
| "max": 0.5011630058288574, |
| "mean": -5.039005191065371e-05, |
| "std": 0.037623330950737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9455182552337646, |
| "max": 3.7725064754486084, |
| "mean": -0.003572634421288967, |
| "std": 0.6815741658210754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.2276747226715088, |
| "max": 0.25224873423576355, |
| "mean": -1.156590678874636e-05, |
| "std": 0.03743501380085945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.0717209130525589, |
| "max": 0.08072538673877716, |
| "mean": -0.0005185012123547494, |
| "std": 0.01566058024764061, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.2281697541475296, |
| "max": 0.25840428471565247, |
| "mean": -2.8510152333183214e-05, |
| "std": 0.03542180359363556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.2006748765707016, |
| "max": 0.21532072126865387, |
| "mean": -0.005526356864720583, |
| "std": 0.06832510232925415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4052578806877136, |
| "max": 1.1931043863296509, |
| "mean": 0.7380141019821167, |
| "std": 0.05553331598639488, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2216469943523407, |
| "max": 0.24624952673912048, |
| "mean": 0.0005209938390180469, |
| "std": 0.04133738949894905, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10345429182052612, |
| "max": 0.024157993495464325, |
| "mean": -0.03266732394695282, |
| "std": 0.018895410001277924, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4506717622280121, |
| "max": 0.4234609603881836, |
| "mean": -0.00043505526264198124, |
| "std": 0.04689793288707733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2517058551311493, |
| "max": 0.4705328345298767, |
| "mean": 0.0032054544426500797, |
| "std": 0.044538334012031555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.31723225116729736, |
| "max": 0.3334876596927643, |
| "mean": -2.5067403839784674e-05, |
| "std": 0.021288011223077774, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32461482286453247, |
| "max": 0.6871254444122314, |
| "mean": 0.5709946155548096, |
| "std": 0.044712185859680176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16488447785377502, |
| "max": 0.174674391746521, |
| "mean": -4.878301842836663e-05, |
| "std": 0.033181823790073395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18708285689353943, |
| "max": 0.14329394698143005, |
| "mean": 4.1025952668860555e-05, |
| "std": 0.02970319241285324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3814561367034912, |
| "max": 0.2463892698287964, |
| "mean": -9.789278919924982e-06, |
| "std": 0.03276311233639717, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6606388092041016, |
| "max": 3.2944271564483643, |
| "mean": -0.01427321694791317, |
| "std": 0.9851539731025696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23539957404136658, |
| "max": 0.2480521947145462, |
| "mean": -1.7979342374019325e-05, |
| "std": 0.04169878736138344, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07279200851917267, |
| "max": 0.15470217168331146, |
| "mean": 0.0006656068144366145, |
| "std": 0.02517576329410076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2668735086917877, |
| "max": 0.2486240267753601, |
| "mean": -1.5421055650222115e-05, |
| "std": 0.04013972356915474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18993628025054932, |
| "max": 0.19500213861465454, |
| "mean": -0.0012349991593509912, |
| "std": 0.06668674200773239, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32912659645080566, |
| "max": 1.003253698348999, |
| "mean": 0.7192496061325073, |
| "std": 0.052594345062971115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.2322535365819931, |
| "max": 0.24589639902114868, |
| "mean": 0.00018273374007549137, |
| "std": 0.0409013107419014, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11447025835514069, |
| "max": 0.018959810957312584, |
| "mean": -0.04247897118330002, |
| "std": 0.018857870250940323, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.39094480872154236, |
| "max": 0.4085846245288849, |
| "mean": -2.156081063731108e-05, |
| "std": 0.0485350526869297, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6941088438034058, |
| "max": 0.413074254989624, |
| "mean": 0.0008494330104440451, |
| "std": 0.060315798968076706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0010608690790832043, |
| "max": 1.0004838705062866, |
| "mean": 0.0004881545901298523, |
| "std": 0.0220896415412426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9995023608207703, |
| "max": 1.004894495010376, |
| "mean": 1.0006191730499268, |
| "std": 0.0017806595424190164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031253229826688766, |
| "max": 0.0312533862888813, |
| "mean": -2.1022129658376798e-05, |
| "std": 0.018033137544989586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03121466003358364, |
| "max": 0.031230736523866653, |
| "mean": -0.0006770135369151831, |
| "std": 0.017827749252319336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.031253378838300705, |
| "max": 0.03125477209687233, |
| "mean": -8.833090760163032e-06, |
| "std": 0.018032172694802284, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031231535598635674, |
| "max": 0.031244806945323944, |
| "mean": -0.0007297678967006505, |
| "std": 0.01794254779815674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00039897009264677763, |
| "max": 0.00031239030067808926, |
| "mean": -2.7656624297378585e-06, |
| "std": 0.00010500323696760461, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9984675645828247, |
| "max": 1.005997896194458, |
| "mean": 0.9998568296432495, |
| "std": 0.0012546924408525229, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.032396964728832245, |
| "max": 0.032092805951833725, |
| "mean": -3.513969204504974e-08, |
| "std": 0.018030446022748947, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03191046044230461, |
| "max": 0.03107621893286705, |
| "mean": -0.00026303951744921505, |
| "std": 0.018048185855150223, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0011175514664500952, |
| "max": 0.0010112477466464043, |
| "mean": -6.1762216319039e-07, |
| "std": 0.0001866686943685636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0003427659103181213, |
| "max": 0.00032113981433212757, |
| "mean": -2.040310619122465e-06, |
| "std": 9.538298763800412e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23462186753749847, |
| "max": 0.27271148562431335, |
| "mean": 6.776777354389196e-06, |
| "std": 0.018810205161571503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32134121656417847, |
| "max": 0.696171224117279, |
| "mean": 0.5816354155540466, |
| "std": 0.045965857803821564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18210144340991974, |
| "max": 0.19822537899017334, |
| "mean": -1.1569689377211034e-05, |
| "std": 0.03318428248167038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16075287759304047, |
| "max": 0.1296185702085495, |
| "mean": -0.0010708055924624205, |
| "std": 0.03414905443787575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33257541060447693, |
| "max": 0.31164395809173584, |
| "mean": -1.0188834494329058e-05, |
| "std": 0.03223486989736557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.815314769744873, |
| "max": 8.776156425476074, |
| "mean": 0.09355179965496063, |
| "std": 1.6212124824523926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.2341691255569458, |
| "max": 0.2423291653394699, |
| "mean": 4.1637467802502215e-05, |
| "std": 0.040857378393411636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.0760289877653122, |
| "max": 0.065830759704113, |
| "mean": 0.00048469315515831113, |
| "std": 0.019415758550167084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24639879167079926, |
| "max": 0.23466575145721436, |
| "mean": -3.0853516364004463e-06, |
| "std": 0.03943203389644623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16285879909992218, |
| "max": 0.16076169908046722, |
| "mean": 0.0016295814421027899, |
| "std": 0.0652732104063034, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5568758845329285, |
| "max": 0.9466937184333801, |
| "mean": 0.7129064202308655, |
| "std": 0.0403011329472065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22882379591464996, |
| "max": 0.25551655888557434, |
| "mean": -4.5426822907757014e-05, |
| "std": 0.0405760332942009, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1351136714220047, |
| "max": 0.022313008084893227, |
| "mean": -0.04135293886065483, |
| "std": 0.01838735118508339, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4227588474750519, |
| "max": 0.3930455446243286, |
| "mean": -4.085732143721543e-06, |
| "std": 0.047785546630620956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6080650687217712, |
| "max": 0.6521760821342468, |
| "mean": 0.0015855736564844847, |
| "std": 0.05685455724596977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2519088387489319, |
| "max": 0.3208920359611511, |
| "mean": -6.068687071092427e-06, |
| "std": 0.01961320824921131, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3596932888031006, |
| "max": 0.6842364072799683, |
| "mean": 0.5706857442855835, |
| "std": 0.042946916073560715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22081606090068817, |
| "max": 0.1773088276386261, |
| "mean": -3.454893158050254e-05, |
| "std": 0.03429890051484108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.1636391431093216, |
| "max": 0.23335042595863342, |
| "mean": 0.00035607549943961203, |
| "std": 0.032843589782714844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26433637738227844, |
| "max": 0.24021653831005096, |
| "mean": -5.268204404274002e-05, |
| "std": 0.033897630870342255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.859472751617432, |
| "max": 5.095940113067627, |
| "mean": 0.043871667236089706, |
| "std": 1.2294032573699951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24689450860023499, |
| "max": 0.2507416307926178, |
| "mean": 7.20950702088885e-05, |
| "std": 0.04398806765675545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.062653087079525, |
| "max": 0.05465509742498398, |
| "mean": 0.0006480686133727431, |
| "std": 0.01719220168888569, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.287101686000824, |
| "max": 0.27245277166366577, |
| "mean": -5.0120852392865345e-05, |
| "std": 0.04298638179898262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16084662079811096, |
| "max": 0.17058779299259186, |
| "mean": -0.002887619426473975, |
| "std": 0.05928964540362358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5198022723197937, |
| "max": 0.9352366328239441, |
| "mean": 0.7134757041931152, |
| "std": 0.03851567581295967, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23833467066287994, |
| "max": 0.24947485327720642, |
| "mean": 0.0004647623864002526, |
| "std": 0.040455412119627, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.1449345052242279, |
| "max": 0.041161470115184784, |
| "mean": -0.039693716913461685, |
| "std": 0.020549351349473, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5341992378234863, |
| "max": 0.584149181842804, |
| "mean": 5.933919965173118e-06, |
| "std": 0.048861313611269, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5195870399475098, |
| "max": 0.4941606819629669, |
| "mean": 0.0023631826043128967, |
| "std": 0.05346201732754707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27384015917778015, |
| "max": 0.3156191408634186, |
| "mean": 1.960434929060284e-06, |
| "std": 0.020050089806318283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3661290407180786, |
| "max": 0.7137707471847534, |
| "mean": 0.5931426286697388, |
| "std": 0.045923035591840744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21142390370368958, |
| "max": 0.1996057629585266, |
| "mean": 3.067640500376001e-05, |
| "std": 0.034866977483034134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18729116022586823, |
| "max": 0.20393171906471252, |
| "mean": 0.0009568152017891407, |
| "std": 0.031525619328022, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.29027533531188965, |
| "max": 0.34051838517189026, |
| "mean": -4.7230056225089356e-05, |
| "std": 0.03458789736032486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.881865978240967, |
| "max": 3.3913497924804688, |
| "mean": 0.014454022981226444, |
| "std": 0.8585575819015503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22494949400424957, |
| "max": 0.25041675567626953, |
| "mean": -3.845839273708407e-06, |
| "std": 0.0422312431037426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055274393409490585, |
| "max": 0.04683299362659454, |
| "mean": -1.701708242762834e-05, |
| "std": 0.015851490199565887, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29334571957588196, |
| "max": 0.2907007336616516, |
| "mean": -7.57977295506862e-06, |
| "std": 0.04194618761539459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.1247822642326355, |
| "max": 0.2594626247882843, |
| "mean": -0.0032404293306171894, |
| "std": 0.0531664676964283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4562881588935852, |
| "max": 0.8474717736244202, |
| "mean": 0.7055672407150269, |
| "std": 0.035394009202718735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5121109485626221, |
| "max": 0.34823864698410034, |
| "mean": 0.0003428200143389404, |
| "std": 0.04020027443766594, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.1863405406475067, |
| "max": 0.039554521441459656, |
| "mean": -0.03938986361026764, |
| "std": 0.02135385014116764, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5456476807594299, |
| "max": 0.5576444864273071, |
| "mean": -7.10671374690719e-05, |
| "std": 0.050736188888549805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5122882723808289, |
| "max": 0.6650155782699585, |
| "mean": 0.0024437594693154097, |
| "std": 0.049542441964149475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3326261341571808, |
| "max": 0.26606664061546326, |
| "mean": 3.3996070669672918e-06, |
| "std": 0.01938733644783497, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32209691405296326, |
| "max": 0.7689979672431946, |
| "mean": 0.651018500328064, |
| "std": 0.045278150588274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.25021034479141235, |
| "max": 0.22022569179534912, |
| "mean": -2.263453097839374e-06, |
| "std": 0.0365014486014843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32728204131126404, |
| "max": 0.28722772002220154, |
| "mean": -0.0006871280493214726, |
| "std": 0.038576990365982056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3110663890838623, |
| "max": 0.37101635336875916, |
| "mean": 6.483237666543573e-05, |
| "std": 0.03624214604496956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.721696376800537, |
| "max": 5.813023090362549, |
| "mean": 0.037980761379003525, |
| "std": 1.4134187698364258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22233453392982483, |
| "max": 0.20630262792110443, |
| "mean": -7.52985361032188e-05, |
| "std": 0.0424862764775753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07775042951107025, |
| "max": 0.051466166973114014, |
| "mean": -0.0009254277683794498, |
| "std": 0.0164100993424654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3309888541698456, |
| "max": 0.3296257257461548, |
| "mean": -4.630289367923979e-06, |
| "std": 0.04279271885752678, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2851186692714691, |
| "max": 0.11168244481086731, |
| "mean": -0.0012053586542606354, |
| "std": 0.04700839892029762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4862569272518158, |
| "max": 0.8893836140632629, |
| "mean": 0.7374457716941833, |
| "std": 0.03831757605075836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3624440133571625, |
| "max": 0.27509352564811707, |
| "mean": 5.130700083100237e-05, |
| "std": 0.040646348148584366, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24782374501228333, |
| "max": 0.04648653045296669, |
| "mean": -0.0392659492790699, |
| "std": 0.023277943953871727, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6279041171073914, |
| "max": 0.5983599424362183, |
| "mean": -6.208260310813785e-05, |
| "std": 0.05311836674809456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7105586528778076, |
| "max": 0.266210675239563, |
| "mean": 0.0009207880357280374, |
| "std": 0.05124485120177269, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3435235619544983, |
| "max": 0.30372199416160583, |
| "mean": 2.971426056319615e-07, |
| "std": 0.019135644659399986, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34978553652763367, |
| "max": 0.7852374911308289, |
| "mean": 0.6388005018234253, |
| "std": 0.04921075701713562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20607401430606842, |
| "max": 0.20750851929187775, |
| "mean": -5.96779900661204e-05, |
| "std": 0.037695422768592834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2588743567466736, |
| "max": 0.2684256136417389, |
| "mean": -0.00040556711610406637, |
| "std": 0.04462844133377075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3547278344631195, |
| "max": 0.32300710678100586, |
| "mean": -6.988519089645706e-06, |
| "std": 0.03720381483435631, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.265876293182373, |
| "max": 4.207967281341553, |
| "mean": -0.026429325342178345, |
| "std": 1.0068732500076294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2394271194934845, |
| "max": 0.24428503215312958, |
| "mean": -2.5281191483372822e-05, |
| "std": 0.04321092739701271, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06252460181713104, |
| "max": 0.056893154978752136, |
| "mean": 0.000347302237059921, |
| "std": 0.014152363874018192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4372415244579315, |
| "max": 0.3737826347351074, |
| "mean": 1.467342644900782e-05, |
| "std": 0.04412253573536873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09628994017839432, |
| "max": 0.17628277838230133, |
| "mean": -0.0006604281952604651, |
| "std": 0.03514600917696953, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4217767119407654, |
| "max": 1.0722668170928955, |
| "mean": 0.7484005689620972, |
| "std": 0.04209807515144348, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2667092978954315, |
| "max": 0.2975556254386902, |
| "mean": -7.937644113553688e-05, |
| "std": 0.04080634191632271, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.1854698657989502, |
| "max": 0.04349794238805771, |
| "mean": -0.03681644797325134, |
| "std": 0.02560725063085556, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4579220414161682, |
| "max": 0.48784998059272766, |
| "mean": 4.282052395865321e-05, |
| "std": 0.05421200394630432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2866349518299103, |
| "max": 0.5520289540290833, |
| "mean": -0.0008793525630608201, |
| "std": 0.04783879220485687, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29281285405158997, |
| "max": 0.32289794087409973, |
| "mean": 6.245412805583328e-06, |
| "std": 0.019969133660197258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29108351469039917, |
| "max": 0.7621498107910156, |
| "mean": 0.6508013010025024, |
| "std": 0.05207887664437294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2440386265516281, |
| "max": 0.2621654272079468, |
| "mean": -5.880815479031298e-06, |
| "std": 0.03961231932044029, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2678271532058716, |
| "max": 0.2002498358488083, |
| "mean": -0.0008784097735770047, |
| "std": 0.05178229510784149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27257686853408813, |
| "max": 0.2541964650154114, |
| "mean": 4.526807060756255e-06, |
| "std": 0.038709431886672974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.982023239135742, |
| "max": 15.968067169189453, |
| "mean": 0.03324813023209572, |
| "std": 1.9908379316329956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.2077104151248932, |
| "max": 0.22651426494121552, |
| "mean": -7.221860869321972e-05, |
| "std": 0.040554750710725784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06946562975645065, |
| "max": 0.06337178498506546, |
| "mean": 0.00015520014858338982, |
| "std": 0.01475033164024353, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46565988659858704, |
| "max": 0.3208334743976593, |
| "mean": 1.9561422959668562e-05, |
| "std": 0.040589939802885056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.064049631357193, |
| "max": 0.11550958454608917, |
| "mean": 0.0011937393574044108, |
| "std": 0.02470548450946808, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3747756779193878, |
| "max": 0.9347750544548035, |
| "mean": 0.7509442567825317, |
| "std": 0.04021797329187393, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2801269292831421, |
| "max": 0.27387121319770813, |
| "mean": -0.00016841593605931848, |
| "std": 0.040997058153152466, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19878797233104706, |
| "max": 0.05111948773264885, |
| "mean": -0.032027605921030045, |
| "std": 0.025102604180574417, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6596145033836365, |
| "max": 0.537032425403595, |
| "mean": -4.937778794555925e-05, |
| "std": 0.05284846946597099, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1930496245622635, |
| "max": 0.5826522707939148, |
| "mean": -0.0005124770104885101, |
| "std": 0.04108353331685066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41787075996398926, |
| "max": 0.37214192748069763, |
| "mean": 6.244237738428637e-06, |
| "std": 0.021621638908982277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21441777050495148, |
| "max": 0.7472008466720581, |
| "mean": 0.6494799852371216, |
| "std": 0.05431411787867546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20989972352981567, |
| "max": 0.19592680037021637, |
| "mean": 4.0151899156626314e-05, |
| "std": 0.039461154490709305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.329771488904953, |
| "max": 0.25982508063316345, |
| "mean": -0.003228080226108432, |
| "std": 0.056280527263879776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2062487006187439, |
| "max": 0.2551846504211426, |
| "mean": 5.400779264164157e-05, |
| "std": 0.038563843816518784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.2493767738342285, |
| "max": 6.938913345336914, |
| "mean": 0.04840244725346565, |
| "std": 1.3855851888656616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.21009960770606995, |
| "max": 0.23065192997455597, |
| "mean": -5.2159043661959e-06, |
| "std": 0.041313353925943375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.0439465157687664, |
| "max": 0.03601067140698433, |
| "mean": -2.0584266167134047e-06, |
| "std": 0.012799846939742565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39804428815841675, |
| "max": 0.34499886631965637, |
| "mean": -5.5499749578302726e-05, |
| "std": 0.04238968715071678, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055174216628074646, |
| "max": 0.06293413788080215, |
| "mean": 0.00036305427784100175, |
| "std": 0.01867016963660717, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3503042459487915, |
| "max": 1.0480320453643799, |
| "mean": 0.7894532084465027, |
| "std": 0.048786185681819916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3337661623954773, |
| "max": 0.3864375650882721, |
| "mean": -0.00016956219042185694, |
| "std": 0.04148184508085251, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15768638253211975, |
| "max": 0.05907022953033447, |
| "mean": -0.031832221895456314, |
| "std": 0.0251291636377573, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6973653435707092, |
| "max": 0.47017383575439453, |
| "mean": -8.81649466464296e-05, |
| "std": 0.051795393228530884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24848268926143646, |
| "max": 0.32916560769081116, |
| "mean": -0.0002544308081269264, |
| "std": 0.041454534977674484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2872900664806366, |
| "max": 0.3505076766014099, |
| "mean": -2.3586867428093683e-06, |
| "std": 0.024236176162958145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19670914113521576, |
| "max": 0.7788708806037903, |
| "mean": 0.6702359914779663, |
| "std": 0.05864134803414345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.2293103188276291, |
| "max": 0.23172836005687714, |
| "mean": -2.0263662008801475e-05, |
| "std": 0.04043755307793617, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.2201755940914154, |
| "max": 0.2412194311618805, |
| "mean": 0.0007778588915243745, |
| "std": 0.05583813413977623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21645531058311462, |
| "max": 0.2269156575202942, |
| "mean": -7.186527363955975e-05, |
| "std": 0.03937343880534172, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.91368579864502, |
| "max": 9.076720237731934, |
| "mean": -0.0012592850252985954, |
| "std": 1.8490537405014038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2699006199836731, |
| "max": 0.2594479024410248, |
| "mean": 4.3596926843747497e-05, |
| "std": 0.03840681165456772, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05783012881875038, |
| "max": 0.057821568101644516, |
| "mean": 0.0003521823091432452, |
| "std": 0.014716818928718567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26518943905830383, |
| "max": 0.2887333035469055, |
| "mean": -6.169862172100693e-05, |
| "std": 0.03907295688986778, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04396004229784012, |
| "max": 0.037220947444438934, |
| "mean": -9.395174856763333e-05, |
| "std": 0.013354334980249405, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.33940210938453674, |
| "max": 1.0958820581436157, |
| "mean": 0.8637964129447937, |
| "std": 0.06389264762401581, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4235352873802185, |
| "max": 0.41927266120910645, |
| "mean": 0.000313018070301041, |
| "std": 0.04350249841809273, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21509824693202972, |
| "max": 0.17092689871788025, |
| "mean": -0.0294746495783329, |
| "std": 0.03193298354744911, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.6005915999412537, |
| "max": 0.5609812140464783, |
| "mean": -0.00015016092220321298, |
| "std": 0.05344870314002037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17891772091388702, |
| "max": 0.3774968683719635, |
| "mean": 0.0013590974267572165, |
| "std": 0.03732309862971306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39461401104927063, |
| "max": 0.36924391984939575, |
| "mean": 3.7040204915683717e-05, |
| "std": 0.028616365045309067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.29045382142066956, |
| "max": 0.8264784812927246, |
| "mean": 0.7055213451385498, |
| "std": 0.0678410679101944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9263020753860474, |
| "max": 1.0267603397369385, |
| "mean": -2.6431953301653266e-05, |
| "std": 0.04762791842222214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8796241879463196, |
| "max": 0.8164305686950684, |
| "mean": -0.0003041320014744997, |
| "std": 0.0956113338470459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27020347118377686, |
| "max": 0.241440087556839, |
| "mean": -2.271639823447913e-05, |
| "std": 0.038950297981500626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.76431655883789, |
| "max": 22.871889114379883, |
| "mean": -0.09189724177122116, |
| "std": 4.073054313659668, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22821645438671112, |
| "max": 0.24578580260276794, |
| "mean": -2.5681954866740853e-05, |
| "std": 0.03863786533474922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06044214218854904, |
| "max": 0.04586166515946388, |
| "mean": -0.00014234766422305256, |
| "std": 0.014693022705614567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3386403024196625, |
| "max": 0.3753957748413086, |
| "mean": 7.493808880099095e-06, |
| "std": 0.04081406444311142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04647579416632652, |
| "max": 0.19592434167861938, |
| "mean": 0.00027245082310400903, |
| "std": 0.01356989610940218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3743247389793396, |
| "max": 1.133009910583496, |
| "mean": 0.8900730609893799, |
| "std": 0.06399820744991302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44806551933288574, |
| "max": 0.5433648824691772, |
| "mean": 2.4754037440288812e-05, |
| "std": 0.04556819051504135, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22422385215759277, |
| "max": 0.08793910592794418, |
| "mean": -0.03202162683010101, |
| "std": 0.03776844963431358, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7274155616760254, |
| "max": 0.6907259225845337, |
| "mean": 3.4943295759148896e-05, |
| "std": 0.05178087204694748, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17463494837284088, |
| "max": 0.2185920923948288, |
| "mean": 3.897436545230448e-05, |
| "std": 0.031783733516931534, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.34052687883377075, |
| "max": 0.37423866987228394, |
| "mean": 4.304847971070558e-05, |
| "std": 0.034138280898332596, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3175727128982544, |
| "max": 1.290410041809082, |
| "mean": 0.6015003323554993, |
| "std": 0.08363870531320572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.28354600071907043, |
| "max": 0.260841429233551, |
| "mean": -3.130652658001054e-06, |
| "std": 0.035979557782411575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23592722415924072, |
| "max": 0.2057497352361679, |
| "mean": 0.00023727506049908698, |
| "std": 0.056021153926849365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43595167994499207, |
| "max": 0.32549113035202026, |
| "mean": 2.434775342408102e-05, |
| "std": 0.034129101783037186, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.553627967834473, |
| "max": 7.324089527130127, |
| "mean": -0.007399275898933411, |
| "std": 0.7001854181289673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34464672207832336, |
| "max": 0.3639456331729889, |
| "mean": 0.0001033150329021737, |
| "std": 0.047829318791627884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.0738968476653099, |
| "max": 0.060446880757808685, |
| "mean": 0.0009350795298814774, |
| "std": 0.014948361553251743, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2562582790851593, |
| "max": 0.28724488615989685, |
| "mean": 4.657229510485195e-06, |
| "std": 0.0415559858083725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05538095533847809, |
| "max": 0.06288731843233109, |
| "mean": 0.00013551797019317746, |
| "std": 0.007167231757193804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4939861297607422, |
| "max": 1.2202398777008057, |
| "mean": 1.013412356376648, |
| "std": 0.1173911765217781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939209461212158, |
| "max": 1.0473735332489014, |
| "mean": -4.927456029690802e-05, |
| "std": 0.05241009593009949, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22382217645645142, |
| "max": 0.1730560064315796, |
| "mean": -0.027248641476035118, |
| "std": 0.03636055067181587, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8865154385566711, |
| "max": 0.9247081279754639, |
| "mean": -0.00014585975441150367, |
| "std": 0.0532848984003067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17122139036655426, |
| "max": 0.38014623522758484, |
| "mean": 0.0033699313644319773, |
| "std": 0.03990361467003822, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7786033749580383, |
| "max": 0.7243013381958008, |
| "mean": 1.8795288269757293e-05, |
| "std": 0.046159159392118454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3385763168334961, |
| "max": 1.4310884475708008, |
| "mean": 0.9482859969139099, |
| "std": 0.20665791630744934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.745840311050415, |
| "max": 1.7046537399291992, |
| "mean": 0.00022703518334310502, |
| "std": 0.15869012475013733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2008079290390015, |
| "max": 1.1013628244400024, |
| "mean": -0.009554527699947357, |
| "std": 0.20401149988174438, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4215790033340454, |
| "max": 0.427647203207016, |
| "mean": 6.439993012463674e-05, |
| "std": 0.048017047345638275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.76506996154785, |
| "max": 19.559972763061523, |
| "mean": -0.24841785430908203, |
| "std": 4.7801384925842285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32463034987449646, |
| "max": 0.4392913281917572, |
| "mean": -1.1934026588278357e-05, |
| "std": 0.046162351965904236, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03394031897187233, |
| "max": 0.03703805059194565, |
| "mean": 0.0006406006286852062, |
| "std": 0.012916130013763905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7043119668960571, |
| "max": 0.6668245792388916, |
| "mean": 4.3251380702713504e-05, |
| "std": 0.05788382515311241, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07238046824932098, |
| "max": 0.06770296394824982, |
| "mean": -0.00013378039875533432, |
| "std": 0.012917297892272472, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38019153475761414, |
| "max": 1.391236424446106, |
| "mean": 1.0665456056594849, |
| "std": 0.21965359151363373, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6170499324798584, |
| "max": 0.718601405620575, |
| "mean": 0.00011217871360713616, |
| "std": 0.058021701872348785, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21975933015346527, |
| "max": 0.22518815100193024, |
| "mean": 0.006216429639607668, |
| "std": 0.049728311598300934, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6300503015518188, |
| "max": 0.8897712826728821, |
| "mean": 1.1653193723759614e-05, |
| "std": 0.023531364277005196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5075116753578186, |
| "max": 0.47451627254486084, |
| "mean": -0.0030209918040782213, |
| "std": 0.06935632228851318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5379416942596436, |
| "max": 1.1812505722045898, |
| "mean": 0.7826943397521973, |
| "std": 0.0987553521990776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26785895228385925, |
| "max": 0.21342454850673676, |
| "mean": -0.0002236703730886802, |
| "std": 0.05399824678897858, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23829060792922974, |
| "max": 0.014859253540635109, |
| "mean": -0.043948449194431305, |
| "std": 0.034328024834394455, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |