| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.4302421808242798, |
| "max": 0.29811733961105347, |
| "mean": -0.0025433888658881187, |
| "std": 0.04256260767579079, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06305147707462311, |
| "max": 0.10753221064805984, |
| "mean": 0.0006371351191774011, |
| "std": 0.03406313806772232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4127056300640106, |
| "max": 0.8369134068489075, |
| "mean": -0.00020153506193310022, |
| "std": 0.024111680686473846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11529576778411865, |
| "max": 0.32162028551101685, |
| "mean": -0.0009410998900420964, |
| "std": 0.019562100991606712, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7922351360321045, |
| "max": 2.8709537982940674, |
| "mean": -0.0003647250996436924, |
| "std": 0.6154845356941223, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2792224586009979, |
| "max": 0.3816443681716919, |
| "mean": 0.0004239956906531006, |
| "std": 0.04274846613407135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.222523033618927, |
| "max": 0.20966869592666626, |
| "mean": -0.004486067220568657, |
| "std": 0.040918223559856415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42831921577453613, |
| "max": 0.4761074483394623, |
| "mean": 3.883292265527416e-06, |
| "std": 0.02451084926724434, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32521355152130127, |
| "max": 0.15685473382472992, |
| "mean": -0.04670340567827225, |
| "std": 0.05158989131450653, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41050970554351807, |
| "max": 0.3547350764274597, |
| "mean": -0.0001308345381403342, |
| "std": 0.023604650050401688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22980044782161713, |
| "max": 0.26265424489974976, |
| "mean": -0.02913527563214302, |
| "std": 0.04935712739825249, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25461670756340027, |
| "max": 0.8201668858528137, |
| "mean": 0.5254921317100525, |
| "std": 0.08082755655050278, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29707157611846924, |
| "max": 0.26584771275520325, |
| "mean": -0.0004257621185388416, |
| "std": 0.032102566212415695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09281580150127411, |
| "max": 0.12489211559295654, |
| "mean": 0.0006475000409409404, |
| "std": 0.025739654898643494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.290749192237854, |
| "max": 0.2813739478588104, |
| "mean": -7.507578993681818e-05, |
| "std": 0.030931759625673294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.900395393371582, |
| "max": 5.815171718597412, |
| "mean": -0.009333068504929543, |
| "std": 1.295695185661316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4251696765422821, |
| "max": 0.3438807427883148, |
| "mean": 9.805745503399521e-05, |
| "std": 0.029953517019748688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.029049167409539223, |
| "max": 0.027643660083413124, |
| "mean": -0.00032356681185774505, |
| "std": 0.012573834508657455, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4541175961494446, |
| "max": 0.4482012987136841, |
| "mean": 2.389368455624208e-05, |
| "std": 0.023853901773691177, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08873512595891953, |
| "max": 0.09103881567716599, |
| "mean": 0.0022877324372529984, |
| "std": 0.019517814740538597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2668094336986542, |
| "max": 1.0562759637832642, |
| "mean": 0.5312086343765259, |
| "std": 0.10443899780511856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5745095610618591, |
| "max": 0.6083298325538635, |
| "mean": -0.0004305951879359782, |
| "std": 0.038600798696279526, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.1827721893787384, |
| "max": 0.04561286419630051, |
| "mean": -0.029457518830895424, |
| "std": 0.042618319392204285, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.167069435119629, |
| "max": 1.6338956356048584, |
| "mean": 0.0003232666349504143, |
| "std": 0.02769671194255352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.1623232066631317, |
| "max": 0.20567050576210022, |
| "mean": -0.021127892658114433, |
| "std": 0.027942020446062088, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22407177090644836, |
| "max": 0.843936026096344, |
| "mean": 0.4876656234264374, |
| "std": 0.07522594183683395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2555537223815918, |
| "max": 0.3058427572250366, |
| "mean": -6.734902854077518e-06, |
| "std": 0.033475104719400406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09539701044559479, |
| "max": 0.11051826924085617, |
| "mean": 6.649381248280406e-05, |
| "std": 0.026965470984578133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2971154749393463, |
| "max": 0.2961341142654419, |
| "mean": 5.3386003855848685e-05, |
| "std": 0.03254621848464012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.165225028991699, |
| "max": 5.085448741912842, |
| "mean": -0.014597215689718723, |
| "std": 1.1575956344604492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3449501693248749, |
| "max": 0.3433416187763214, |
| "mean": 7.857720629544929e-05, |
| "std": 0.030061962082982063, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03606901317834854, |
| "max": 0.033370036631822586, |
| "mean": -0.0001412129495292902, |
| "std": 0.01303885132074356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3154986798763275, |
| "max": 0.37501832842826843, |
| "mean": -2.0688352378783748e-05, |
| "std": 0.024059457704424858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.1054358258843422, |
| "max": 0.12218254804611206, |
| "mean": -0.001968180760741234, |
| "std": 0.02885930798947811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3115288317203522, |
| "max": 1.1208443641662598, |
| "mean": 0.6663118004798889, |
| "std": 0.09773967415094376, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8727405071258545, |
| "max": 0.6275568604469299, |
| "mean": 0.001675269566476345, |
| "std": 0.04743880778551102, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27153271436691284, |
| "max": 0.034265656024217606, |
| "mean": -0.04660956189036369, |
| "std": 0.04060109704732895, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9227067232131958, |
| "max": 0.9646649360656738, |
| "mean": 0.0010214094072580338, |
| "std": 0.04070667922496796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14452314376831055, |
| "max": 0.0749678835272789, |
| "mean": -0.009091369807720184, |
| "std": 0.025692423805594444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.2401818335056305, |
| "max": 0.7130386829376221, |
| "mean": 0.4472571313381195, |
| "std": 0.05933048576116562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27240708470344543, |
| "max": 0.2978667914867401, |
| "mean": 9.335752110928297e-06, |
| "std": 0.03546963632106781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11937365680932999, |
| "max": 0.11856595426797867, |
| "mean": 0.0007609212771058083, |
| "std": 0.027630653232336044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2809975743293762, |
| "max": 0.2798910439014435, |
| "mean": -7.717408880125731e-05, |
| "std": 0.03509914502501488, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5100622177124023, |
| "max": 2.5220582485198975, |
| "mean": 0.026752006262540817, |
| "std": 0.5868890285491943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2211453914642334, |
| "max": 0.2715946435928345, |
| "mean": 2.9373950383160263e-06, |
| "std": 0.030732743442058563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03357521444559097, |
| "max": 0.031258679926395416, |
| "mean": 0.00011264161730650812, |
| "std": 0.012410733848810196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.235328808426857, |
| "max": 0.23169946670532227, |
| "mean": 5.690910984412767e-05, |
| "std": 0.025696253404021263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.1358632743358612, |
| "max": 0.1274021714925766, |
| "mean": -0.005497328005731106, |
| "std": 0.03996951878070831, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3545131981372833, |
| "max": 1.172075629234314, |
| "mean": 0.7106390595436096, |
| "std": 0.10376753658056259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6174606084823608, |
| "max": 0.5543855428695679, |
| "mean": 0.0011602300219237804, |
| "std": 0.04611969366669655, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18855658173561096, |
| "max": 0.024964194744825363, |
| "mean": -0.034842122346162796, |
| "std": 0.02861381322145462, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1317338943481445, |
| "max": 0.9715229272842407, |
| "mean": 0.00035948510048910975, |
| "std": 0.04234746843576431, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5981062650680542, |
| "max": 0.06280992925167084, |
| "mean": -0.004879314452409744, |
| "std": 0.028617065399885178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3752063512802124, |
| "max": 0.940569281578064, |
| "mean": 0.5925507545471191, |
| "std": 0.06694991141557693, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.39141029119491577, |
| "max": 0.3690900504589081, |
| "mean": 7.122607348719612e-05, |
| "std": 0.03718871995806694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11894620954990387, |
| "max": 0.13650599122047424, |
| "mean": 0.0009305156418122351, |
| "std": 0.029250090941786766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6192548274993896, |
| "max": 0.5089151263237, |
| "mean": 1.523251921753399e-05, |
| "std": 0.03644222766160965, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.188663482666016, |
| "max": 8.790773391723633, |
| "mean": -0.10929425060749054, |
| "std": 1.6991606950759888, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27665913105010986, |
| "max": 0.23989883065223694, |
| "mean": 5.3170409955782816e-05, |
| "std": 0.03261546045541763, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05207620561122894, |
| "max": 0.039528362452983856, |
| "mean": 9.136732842307538e-05, |
| "std": 0.012959755957126617, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23080551624298096, |
| "max": 0.23467440903186798, |
| "mean": -2.1718551579397172e-05, |
| "std": 0.0293918177485466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20433980226516724, |
| "max": 0.10561156272888184, |
| "mean": -0.0040257819928228855, |
| "std": 0.03262433037161827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3398168385028839, |
| "max": 1.0127116441726685, |
| "mean": 0.7008739709854126, |
| "std": 0.09675976634025574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5649558305740356, |
| "max": 0.8329834342002869, |
| "mean": 0.00041514058830216527, |
| "std": 0.04230239987373352, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21168796718120575, |
| "max": 0.030586589127779007, |
| "mean": -0.03219006583094597, |
| "std": 0.02651149593293667, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7545908689498901, |
| "max": 0.7186294794082642, |
| "mean": -9.42062251851894e-06, |
| "std": 0.036842189729213715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26354482769966125, |
| "max": 0.10587722808122635, |
| "mean": -0.0030317441560328007, |
| "std": 0.028866499662399292, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28444212675094604, |
| "max": 0.695132315158844, |
| "mean": 0.49955570697784424, |
| "std": 0.04653683677315712, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27924680709838867, |
| "max": 0.2342948317527771, |
| "mean": -0.00011125784658361226, |
| "std": 0.03876316547393799, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.1545136421918869, |
| "max": 0.12684346735477448, |
| "mean": -0.002232692204415798, |
| "std": 0.03341302275657654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41413962841033936, |
| "max": 0.6599588990211487, |
| "mean": -1.9788125428021885e-05, |
| "std": 0.03910021111369133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.238841533660889, |
| "max": 4.723404884338379, |
| "mean": -0.02046296000480652, |
| "std": 1.0078750848770142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.245038241147995, |
| "max": 0.20766045153141022, |
| "mean": 4.384694329928607e-05, |
| "std": 0.03396622836589813, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.034554872661828995, |
| "max": 0.04480086266994476, |
| "mean": -1.7740559997037053e-05, |
| "std": 0.012627062387764454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20076899230480194, |
| "max": 0.20593972504138947, |
| "mean": -2.9633309168275446e-05, |
| "std": 0.031023768708109856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.1999690979719162, |
| "max": 0.11344368755817413, |
| "mean": -0.0029194147791713476, |
| "std": 0.034512441605329514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3670799434185028, |
| "max": 1.056976079940796, |
| "mean": 0.67062908411026, |
| "std": 0.06638980656862259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39831405878067017, |
| "max": 0.5025192499160767, |
| "mean": -3.858314084936865e-05, |
| "std": 0.04113723710179329, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12804804742336273, |
| "max": 0.026756688952445984, |
| "mean": -0.030546799302101135, |
| "std": 0.021871846169233322, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4490903913974762, |
| "max": 0.4329609274864197, |
| "mean": 8.376075129490346e-05, |
| "std": 0.034896120429039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26764214038848877, |
| "max": 0.07259879261255264, |
| "mean": -0.0011110607301816344, |
| "std": 0.023125821724534035, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28748002648353577, |
| "max": 0.68532794713974, |
| "mean": 0.5245869159698486, |
| "std": 0.047536663711071014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22228431701660156, |
| "max": 0.22351308166980743, |
| "mean": 1.5719435396022163e-05, |
| "std": 0.03895285725593567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13649071753025055, |
| "max": 0.10923465341329575, |
| "mean": 0.00023689989757258445, |
| "std": 0.029244115576148033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37521880865097046, |
| "max": 0.43729540705680847, |
| "mean": -9.554900316288695e-06, |
| "std": 0.03928901627659798, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8464367389678955, |
| "max": 5.000250816345215, |
| "mean": 0.009746391326189041, |
| "std": 0.8453746438026428, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22334088385105133, |
| "max": 0.22010144591331482, |
| "mean": -2.237738954136148e-07, |
| "std": 0.03441348671913147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04365158826112747, |
| "max": 0.035844866186380386, |
| "mean": -0.00025856425054371357, |
| "std": 0.012080752290785313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21325859427452087, |
| "max": 0.1888350248336792, |
| "mean": -1.6756794138927944e-05, |
| "std": 0.03154024854302406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.1807885617017746, |
| "max": 0.1208307296037674, |
| "mean": -0.0024116605054587126, |
| "std": 0.04126964509487152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4224590063095093, |
| "max": 0.9420249462127686, |
| "mean": 0.6628004908561707, |
| "std": 0.05680832266807556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.371380478143692, |
| "max": 0.4757322669029236, |
| "mean": -8.227255602832884e-05, |
| "std": 0.040896233171224594, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20817440748214722, |
| "max": 0.027128340676426888, |
| "mean": -0.03024515137076378, |
| "std": 0.021346455439925194, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.34020015597343445, |
| "max": 0.7336611747741699, |
| "mean": 8.482092380290851e-05, |
| "std": 0.03477148711681366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2402409464120865, |
| "max": 0.05044962465763092, |
| "mean": -0.0011967722093686461, |
| "std": 0.020463695749640465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3060604929924011, |
| "max": 0.6536474823951721, |
| "mean": 0.525157630443573, |
| "std": 0.04612673819065094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30424413084983826, |
| "max": 0.2173623889684677, |
| "mean": 6.994098657742143e-05, |
| "std": 0.03949854522943497, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14945130050182343, |
| "max": 0.13143886625766754, |
| "mean": 0.00034817858249880373, |
| "std": 0.030476493760943413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2574465572834015, |
| "max": 0.20223106443881989, |
| "mean": 3.098994420724921e-05, |
| "std": 0.03948768228292465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.336733102798462, |
| "max": 2.376356840133667, |
| "mean": -0.02624763362109661, |
| "std": 0.44985321164131165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18909630179405212, |
| "max": 0.21054214239120483, |
| "mean": 3.723270128830336e-05, |
| "std": 0.034798216074705124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03172660619020462, |
| "max": 0.03550007939338684, |
| "mean": -0.00020049612794537097, |
| "std": 0.012289649806916714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.1884716898202896, |
| "max": 0.17050357162952423, |
| "mean": -6.797749665565789e-05, |
| "std": 0.03217477723956108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.1394048035144806, |
| "max": 0.13731525838375092, |
| "mean": -0.0025170280132442713, |
| "std": 0.05131148546934128, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.46712788939476013, |
| "max": 0.9565918445587158, |
| "mean": 0.6689888834953308, |
| "std": 0.052790068089962006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32436564564704895, |
| "max": 0.3097445070743561, |
| "mean": -1.5296809579012915e-06, |
| "std": 0.04095211252570152, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12481985241174698, |
| "max": 0.02530287392437458, |
| "mean": -0.030714336782693863, |
| "std": 0.019815392792224884, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44007495045661926, |
| "max": 0.44524118304252625, |
| "mean": 9.531660907668993e-05, |
| "std": 0.03512417897582054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22461570799350739, |
| "max": 0.05165664851665497, |
| "mean": -0.0011837758356705308, |
| "std": 0.018468836322426796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33936041593551636, |
| "max": 0.7393229007720947, |
| "mean": 0.5587522983551025, |
| "std": 0.04140261933207512, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27253732085227966, |
| "max": 0.2784145176410675, |
| "mean": 1.9914490621886216e-05, |
| "std": 0.041062433272600174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13705013692378998, |
| "max": 0.13989973068237305, |
| "mean": 0.0004888542462140322, |
| "std": 0.02663799747824669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49079182744026184, |
| "max": 0.35604262351989746, |
| "mean": 8.881442772690207e-05, |
| "std": 0.04070043936371803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2975404262542725, |
| "max": 1.7454535961151123, |
| "mean": -0.021080955862998962, |
| "std": 0.5002180933952332, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21756696701049805, |
| "max": 0.19789846241474152, |
| "mean": -4.058882768731564e-05, |
| "std": 0.03423743322491646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04133187234401703, |
| "max": 0.03867634758353233, |
| "mean": -0.00014505762374028563, |
| "std": 0.012880876660346985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.1775415539741516, |
| "max": 0.18375425040721893, |
| "mean": 4.7608955355826765e-05, |
| "std": 0.03156036138534546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17991603910923004, |
| "max": 0.18388697504997253, |
| "mean": -0.0022191007155925035, |
| "std": 0.05484011396765709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4743064045906067, |
| "max": 1.0255905389785767, |
| "mean": 0.6453731656074524, |
| "std": 0.050350919365882874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2718814015388489, |
| "max": 0.30937331914901733, |
| "mean": 0.00011242884647799656, |
| "std": 0.04068846255540848, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.1058216467499733, |
| "max": 0.026849187910556793, |
| "mean": -0.029516499489545822, |
| "std": 0.01792926900088787, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33906012773513794, |
| "max": 0.3292734920978546, |
| "mean": 5.717227759305388e-05, |
| "std": 0.034418120980262756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18169447779655457, |
| "max": 0.04204929992556572, |
| "mean": -0.0010728895431384444, |
| "std": 0.01721538044512272, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32545599341392517, |
| "max": 0.686664342880249, |
| "mean": 0.5112766027450562, |
| "std": 0.036954235285520554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23384402692317963, |
| "max": 0.22573164105415344, |
| "mean": -3.598508192226291e-05, |
| "std": 0.0391816683113575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11534573137760162, |
| "max": 0.13162653148174286, |
| "mean": 0.0001513269089628011, |
| "std": 0.029193254187703133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3528960049152374, |
| "max": 0.285469651222229, |
| "mean": 7.2757711677695625e-06, |
| "std": 0.03925016149878502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.133274078369141, |
| "max": 3.544353723526001, |
| "mean": -0.011593645438551903, |
| "std": 0.6827419400215149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21140114963054657, |
| "max": 0.20909518003463745, |
| "mean": 3.4737786336336285e-05, |
| "std": 0.0344894602894783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.035711076110601425, |
| "max": 0.048078615218400955, |
| "mean": 0.0007944396347738802, |
| "std": 0.01285555586218834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21062366664409637, |
| "max": 0.193213552236557, |
| "mean": -1.284678091906244e-06, |
| "std": 0.031699951738119125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18667221069335938, |
| "max": 0.17721369862556458, |
| "mean": -0.002848550211638212, |
| "std": 0.058637380599975586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746397137641907, |
| "max": 1.041860818862915, |
| "mean": 0.651482880115509, |
| "std": 0.049657855182886124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24850571155548096, |
| "max": 0.32913738489151, |
| "mean": 0.00018063749303109944, |
| "std": 0.04057687148451805, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12447232753038406, |
| "max": 0.024594351649284363, |
| "mean": -0.030502719804644585, |
| "std": 0.01760093867778778, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4212387502193451, |
| "max": 0.482032835483551, |
| "mean": 2.141768618457718e-06, |
| "std": 0.03540309891104698, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15185940265655518, |
| "max": 0.04337269812822342, |
| "mean": 3.945987918996252e-05, |
| "std": 0.014877513982355595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31561797857284546, |
| "max": 0.682021975517273, |
| "mean": 0.5529669523239136, |
| "std": 0.04071478173136711, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20643697679042816, |
| "max": 0.21993368864059448, |
| "mean": 3.0923340091248974e-05, |
| "std": 0.03830339014530182, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.1378619521856308, |
| "max": 0.112775057554245, |
| "mean": 2.049036993412301e-05, |
| "std": 0.02582140639424324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40277066826820374, |
| "max": 0.3711613118648529, |
| "mean": 2.6232244636048563e-05, |
| "std": 0.038185227662324905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7714638710021973, |
| "max": 2.8691656589508057, |
| "mean": 0.0011571794748306274, |
| "std": 0.516919732093811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20294280350208282, |
| "max": 0.1974332332611084, |
| "mean": 2.9497665309463628e-05, |
| "std": 0.03430052474141121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050981007516384125, |
| "max": 0.04004063457250595, |
| "mean": -0.0004196166410110891, |
| "std": 0.013425874523818493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19651710987091064, |
| "max": 0.2017611861228943, |
| "mean": -1.2331822290434502e-05, |
| "std": 0.031808242201805115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19314663112163544, |
| "max": 0.19513675570487976, |
| "mean": -0.0029698254074901342, |
| "std": 0.06256996840238571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3494587540626526, |
| "max": 1.0840725898742676, |
| "mean": 0.6672499775886536, |
| "std": 0.05523226782679558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22516681253910065, |
| "max": 0.2514885663986206, |
| "mean": 0.00035906361881643534, |
| "std": 0.040765900164842606, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09103509038686752, |
| "max": 0.04371785372495651, |
| "mean": -0.030089743435382843, |
| "std": 0.017607875168323517, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35351207852363586, |
| "max": 0.30409130454063416, |
| "mean": -4.350150265963748e-05, |
| "std": 0.03712816908955574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16206228733062744, |
| "max": 0.06353683769702911, |
| "mean": -8.305630763061345e-05, |
| "std": 0.019406888633966446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.348787397146225, |
| "max": 0.722071647644043, |
| "mean": 0.5424383878707886, |
| "std": 0.039067838340997696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21942198276519775, |
| "max": 0.22312530875205994, |
| "mean": -1.1118878319393843e-05, |
| "std": 0.03923613205552101, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11845415830612183, |
| "max": 0.1708553582429886, |
| "mean": 0.0002840349334292114, |
| "std": 0.025122985243797302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24687451124191284, |
| "max": 0.301123708486557, |
| "mean": -3.652745726867579e-05, |
| "std": 0.038935691118240356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5055902004241943, |
| "max": 3.715036153793335, |
| "mean": 0.01585160195827484, |
| "std": 0.7825287580490112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.2186805158853531, |
| "max": 0.23763009905815125, |
| "mean": -1.3581981875177007e-05, |
| "std": 0.036307912319898605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.047199200838804245, |
| "max": 0.05141306668519974, |
| "mean": 0.0004809980746358633, |
| "std": 0.013516527600586414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21401917934417725, |
| "max": 0.21761927008628845, |
| "mean": 5.652284016832709e-05, |
| "std": 0.03361988440155983, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21142059564590454, |
| "max": 0.23152688145637512, |
| "mean": -0.005106795579195023, |
| "std": 0.061881836503744125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36215895414352417, |
| "max": 1.1013121604919434, |
| "mean": 0.6993671655654907, |
| "std": 0.05360371619462967, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.2346053123474121, |
| "max": 0.24489951133728027, |
| "mean": 0.000463481672341004, |
| "std": 0.0412747748196125, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09809241443872452, |
| "max": 0.06830352544784546, |
| "mean": -0.031439535319805145, |
| "std": 0.01812061481177807, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3016868829727173, |
| "max": 0.35154613852500916, |
| "mean": -8.162677113432437e-05, |
| "std": 0.040280576795339584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15234576165676117, |
| "max": 0.14968463778495789, |
| "mean": 0.00025512842694297433, |
| "std": 0.023036863654851913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.99940425157547, |
| "max": 1.0017729997634888, |
| "mean": 1.0002546310424805, |
| "std": 0.0006659556529484689, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03126639127731323, |
| "max": 0.03126263990998268, |
| "mean": -1.9294351659482345e-05, |
| "std": 0.018044061958789825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031232889741659164, |
| "max": 0.03099249303340912, |
| "mean": -0.001084338640794158, |
| "std": 0.017953665927052498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031263306736946106, |
| "max": 0.031267084181308746, |
| "mean": 3.548892891558353e-06, |
| "std": 0.018044468015432358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.03115880861878395, |
| "max": 0.031179169192910194, |
| "mean": 0.0003339822869747877, |
| "std": 0.018065886572003365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9994449615478516, |
| "max": 1.0018095970153809, |
| "mean": 1.0002632141113281, |
| "std": 0.0006522060139104724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03126963973045349, |
| "max": 0.03127080947160721, |
| "mean": -8.397149031225126e-06, |
| "std": 0.01804318279027939, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.0312512069940567, |
| "max": 0.031249327585101128, |
| "mean": 0.0001536280324216932, |
| "std": 0.01799430511891842, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.38297948241233826, |
| "max": 0.7195636034011841, |
| "mean": 0.5807591080665588, |
| "std": 0.03886506333947182, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.2380739152431488, |
| "max": 0.19658486545085907, |
| "mean": 2.6584548322716728e-05, |
| "std": 0.03746968135237694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11867813766002655, |
| "max": 0.16608171164989471, |
| "mean": 0.0009910191874951124, |
| "std": 0.02755763940513134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2461400330066681, |
| "max": 0.5007420182228088, |
| "mean": -5.0447401008568704e-05, |
| "std": 0.03762757405638695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9424328804016113, |
| "max": 3.7695746421813965, |
| "mean": -0.0035724048502743244, |
| "std": 0.681464672088623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22735856473445892, |
| "max": 0.2514454424381256, |
| "mean": -1.1598500350373797e-05, |
| "std": 0.03743908926844597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07171762734651566, |
| "max": 0.08069814741611481, |
| "mean": -0.0005200206069275737, |
| "std": 0.015662606805562973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.2281658798456192, |
| "max": 0.2580048441886902, |
| "mean": -2.8616894269362092e-05, |
| "std": 0.03542575612664223, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20041774213314056, |
| "max": 0.2152491807937622, |
| "mean": -0.005537157878279686, |
| "std": 0.06833865493535995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4051746428012848, |
| "max": 1.1894384622573853, |
| "mean": 0.7380443215370178, |
| "std": 0.05523209273815155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2211739420890808, |
| "max": 0.2460654377937317, |
| "mean": 0.0005211163079366088, |
| "std": 0.04134252667427063, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10346150398254395, |
| "max": 0.024183176457881927, |
| "mean": -0.03266960382461548, |
| "std": 0.018883610144257545, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4493599832057953, |
| "max": 0.42234691977500916, |
| "mean": -0.0004324695619288832, |
| "std": 0.046903885900974274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25148940086364746, |
| "max": 0.47015321254730225, |
| "mean": 0.0031974762678146362, |
| "std": 0.044545728713274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3170487582683563, |
| "max": 0.33324581384658813, |
| "mean": -2.528912045818288e-05, |
| "std": 0.0212908573448658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3246053457260132, |
| "max": 0.6854332685470581, |
| "mean": 0.5710639357566833, |
| "std": 0.04471997916698456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16466176509857178, |
| "max": 0.1740393489599228, |
| "mean": -4.8587571654934436e-05, |
| "std": 0.03318466991186142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18687166273593903, |
| "max": 0.14292190968990326, |
| "mean": 3.81053687306121e-05, |
| "std": 0.029696526005864143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38059744238853455, |
| "max": 0.24608764052391052, |
| "mean": -9.966568541130982e-06, |
| "std": 0.032765913754701614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.65606689453125, |
| "max": 3.290353775024414, |
| "mean": -0.014253877103328705, |
| "std": 0.9852582216262817, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23507949709892273, |
| "max": 0.2475711703300476, |
| "mean": -1.77873171196552e-05, |
| "std": 0.041702862828969955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07279693335294724, |
| "max": 0.15454502403736115, |
| "mean": 0.000664762279484421, |
| "std": 0.025170980021357536, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2665710747241974, |
| "max": 0.24850338697433472, |
| "mean": -1.535093724669423e-05, |
| "std": 0.04014323651790619, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18962323665618896, |
| "max": 0.19475142657756805, |
| "mean": -0.0012306260177865624, |
| "std": 0.06669402867555618, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.3292522728443146, |
| "max": 0.999567985534668, |
| "mean": 0.7192600965499878, |
| "std": 0.052342262119054794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23160076141357422, |
| "max": 0.2457643449306488, |
| "mean": 0.00018272445595357567, |
| "std": 0.04090625420212746, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11420896649360657, |
| "max": 0.018650474026799202, |
| "mean": -0.042482297867536545, |
| "std": 0.018855074420571327, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3899804949760437, |
| "max": 0.40730100870132446, |
| "mean": -2.1874793674214743e-05, |
| "std": 0.04854067414999008, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6932750344276428, |
| "max": 0.41266557574272156, |
| "mean": 0.0008518121903762221, |
| "std": 0.060295384377241135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": 0.0, |
| "max": 0.999998927116394, |
| "mean": 0.00048828075523488224, |
| "std": 0.02209167368710041, |
| "sparsity": 0.99951171875, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9994292855262756, |
| "max": 1.0017839670181274, |
| "mean": 1.000253677368164, |
| "std": 0.000652652932330966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03126111254096031, |
| "max": 0.0312650129199028, |
| "mean": -2.1023370209150016e-05, |
| "std": 0.0180354006588459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031219881027936935, |
| "max": 0.031236713752150536, |
| "mean": -0.0006771213375031948, |
| "std": 0.017829909920692444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03126417100429535, |
| "max": 0.03126959502696991, |
| "mean": -8.832794264890254e-06, |
| "std": 0.018034426495432854, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.03123662993311882, |
| "max": 0.03124932385981083, |
| "mean": -0.0007298794225789607, |
| "std": 0.01794484816491604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9993973970413208, |
| "max": 1.0017794370651245, |
| "mean": 1.00028395652771, |
| "std": 0.0006690355949103832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03126800060272217, |
| "max": 0.031265586614608765, |
| "mean": 3.591585482354276e-06, |
| "std": 0.018040791153907776, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.031230367720127106, |
| "max": 0.03125299513339996, |
| "mean": 0.00019574598991312087, |
| "std": 0.018076494336128235, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23457114398479462, |
| "max": 0.2725405693054199, |
| "mean": 6.967699391680071e-06, |
| "std": 0.01881221868097782, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.3212726414203644, |
| "max": 0.6936339139938354, |
| "mean": 0.5816882848739624, |
| "std": 0.04593805596232414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18182046711444855, |
| "max": 0.1976739764213562, |
| "mean": -1.1725308468157891e-05, |
| "std": 0.033187251538038254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.1606890708208084, |
| "max": 0.12948599457740784, |
| "mean": -0.001067878445610404, |
| "std": 0.034144606441259384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.332189679145813, |
| "max": 0.31144458055496216, |
| "mean": -1.0352114259148948e-05, |
| "std": 0.03223797678947449, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.803721904754639, |
| "max": 8.76359748840332, |
| "mean": 0.09347224235534668, |
| "std": 1.6197657585144043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23381681740283966, |
| "max": 0.2420002520084381, |
| "mean": 4.138463191338815e-05, |
| "std": 0.04086202755570412, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07600986212491989, |
| "max": 0.06578930467367172, |
| "mean": 0.00047852861462160945, |
| "std": 0.019416049122810364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24590720236301422, |
| "max": 0.23409155011177063, |
| "mean": -2.9138864192645997e-06, |
| "std": 0.039436690509319305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16287560760974884, |
| "max": 0.16082623600959778, |
| "mean": 0.0016318459529429674, |
| "std": 0.06528104841709137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5568646192550659, |
| "max": 0.9439972043037415, |
| "mean": 0.7129673957824707, |
| "std": 0.0401376374065876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22865070402622223, |
| "max": 0.25514620542526245, |
| "mean": -4.54368710052222e-05, |
| "std": 0.04058137908577919, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13480910658836365, |
| "max": 0.022281890735030174, |
| "mean": -0.04135727509856224, |
| "std": 0.018383679911494255, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42169636487960815, |
| "max": 0.39239397644996643, |
| "mean": -4.40980693383608e-06, |
| "std": 0.04779108986258507, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6075002551078796, |
| "max": 0.6514228582382202, |
| "mean": 0.0015837398823350668, |
| "std": 0.05683837831020355, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25171443819999695, |
| "max": 0.32070818543434143, |
| "mean": -6.0755610320484266e-06, |
| "std": 0.01961563341319561, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3600234091281891, |
| "max": 0.6823956370353699, |
| "mean": 0.5707757472991943, |
| "std": 0.04296165704727173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22057192027568817, |
| "max": 0.1770636886358261, |
| "mean": -3.4672062611207366e-05, |
| "std": 0.03430239111185074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16365490853786469, |
| "max": 0.23306845128536224, |
| "mean": 0.0003636471228674054, |
| "std": 0.03286948427557945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2637504041194916, |
| "max": 0.23983356356620789, |
| "mean": -5.237644290900789e-05, |
| "std": 0.03390154615044594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.8552327156066895, |
| "max": 5.091460227966309, |
| "mean": 0.04388175159692764, |
| "std": 1.2293211221694946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.2467021644115448, |
| "max": 0.2504825294017792, |
| "mean": 7.218097016448155e-05, |
| "std": 0.04399321228265762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06264208257198334, |
| "max": 0.054531484842300415, |
| "mean": 0.00065071159042418, |
| "std": 0.017192156985402107, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2865971624851227, |
| "max": 0.2718464434146881, |
| "mean": -4.9919544835574925e-05, |
| "std": 0.04299159720540047, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16066378355026245, |
| "max": 0.17053070664405823, |
| "mean": -0.0028841430321335793, |
| "std": 0.059287648648023605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5196661353111267, |
| "max": 0.9328836798667908, |
| "mean": 0.7135858535766602, |
| "std": 0.038419246673583984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.2381887435913086, |
| "max": 0.24951320886611938, |
| "mean": 0.00046486116480082273, |
| "std": 0.04046149179339409, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14427022635936737, |
| "max": 0.041461389511823654, |
| "mean": -0.03969397395849228, |
| "std": 0.02054336480796337, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5328505039215088, |
| "max": 0.5830832719802856, |
| "mean": 5.9098410929436795e-06, |
| "std": 0.04886835068464279, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5191918015480042, |
| "max": 0.49353325366973877, |
| "mean": 0.0023602654691785574, |
| "std": 0.05344703048467636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2736090123653412, |
| "max": 0.31520769000053406, |
| "mean": 1.8358268789597787e-06, |
| "std": 0.020052799955010414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36640509963035583, |
| "max": 0.711678147315979, |
| "mean": 0.593246340751648, |
| "std": 0.04593454673886299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21106205880641937, |
| "max": 0.1996321678161621, |
| "mean": 3.077441579080187e-05, |
| "std": 0.03486856073141098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18727192282676697, |
| "max": 0.20402666926383972, |
| "mean": 0.0009561080951243639, |
| "std": 0.031529128551483154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28969451785087585, |
| "max": 0.3398367166519165, |
| "mean": -4.7392662963829935e-05, |
| "std": 0.03458969667553902, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.877439260482788, |
| "max": 3.3875346183776855, |
| "mean": 0.014458965510129929, |
| "std": 0.8584734797477722, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2244323492050171, |
| "max": 0.24988871812820435, |
| "mean": -3.996262876171386e-06, |
| "std": 0.04223586246371269, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055074166506528854, |
| "max": 0.0468442440032959, |
| "mean": -1.8697581253945827e-05, |
| "std": 0.015848318114876747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2929523289203644, |
| "max": 0.29100877046585083, |
| "mean": -7.363702025031671e-06, |
| "std": 0.04195086285471916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12487897276878357, |
| "max": 0.2594272792339325, |
| "mean": -0.003234811592847109, |
| "std": 0.05315796285867691, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45620009303092957, |
| "max": 0.844541609287262, |
| "mean": 0.7056601047515869, |
| "std": 0.035222552716732025, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5114080309867859, |
| "max": 0.34850868582725525, |
| "mean": 0.00034260982647538185, |
| "std": 0.040206458419561386, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18708936870098114, |
| "max": 0.03951717168092728, |
| "mean": -0.03939085826277733, |
| "std": 0.02134866826236248, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.544402539730072, |
| "max": 0.5565053224563599, |
| "mean": -7.180786633398384e-05, |
| "std": 0.05074291676282883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.511856734752655, |
| "max": 0.6643833518028259, |
| "mean": 0.002446281723678112, |
| "std": 0.04952690377831459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3324280381202698, |
| "max": 0.2657060921192169, |
| "mean": 3.681749149109237e-06, |
| "std": 0.01939038746058941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32228395342826843, |
| "max": 0.76633620262146, |
| "mean": 0.6510899662971497, |
| "std": 0.04530107229948044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2495409995317459, |
| "max": 0.21955986320972443, |
| "mean": -2.516008862585295e-06, |
| "std": 0.03650251030921936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3271917402744293, |
| "max": 0.2873159945011139, |
| "mean": -0.0006787859019823372, |
| "std": 0.03855893388390541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3100964426994324, |
| "max": 0.3699168264865875, |
| "mean": 6.482247408712283e-05, |
| "std": 0.036243122071027756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.71769905090332, |
| "max": 5.807940483093262, |
| "mean": 0.03796037286520004, |
| "std": 1.4132623672485352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22175297141075134, |
| "max": 0.20589375495910645, |
| "mean": -7.500311767216772e-05, |
| "std": 0.04249146580696106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07759421318769455, |
| "max": 0.05135132744908333, |
| "mean": -0.000925259490031749, |
| "std": 0.016409944742918015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33092743158340454, |
| "max": 0.3291303813457489, |
| "mean": -4.938564870826667e-06, |
| "std": 0.04279821738600731, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.28501445055007935, |
| "max": 0.11160922050476074, |
| "mean": -0.0012059551663696766, |
| "std": 0.047013018280267715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4864731431007385, |
| "max": 0.8868119716644287, |
| "mean": 0.7375612854957581, |
| "std": 0.03823444992303848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.36123231053352356, |
| "max": 0.2742029130458832, |
| "mean": 5.119089109939523e-05, |
| "std": 0.04065319895744324, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24762794375419617, |
| "max": 0.046543918550014496, |
| "mean": -0.03927048668265343, |
| "std": 0.023254919797182083, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6263269186019897, |
| "max": 0.5970423817634583, |
| "mean": -6.188904080772772e-05, |
| "std": 0.05312599986791611, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.709787905216217, |
| "max": 0.2658335268497467, |
| "mean": 0.0009195120073854923, |
| "std": 0.051235005259513855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.34334826469421387, |
| "max": 0.30343398451805115, |
| "mean": 2.1822438611707184e-07, |
| "std": 0.019139666110277176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34997785091400146, |
| "max": 0.7828695178031921, |
| "mean": 0.6389003992080688, |
| "std": 0.049218229949474335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.2058519572019577, |
| "max": 0.20681944489479065, |
| "mean": -5.9934332966804504e-05, |
| "std": 0.037698548287153244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2586883008480072, |
| "max": 0.26840776205062866, |
| "mean": -0.0004055192694067955, |
| "std": 0.044631343334913254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3542138934135437, |
| "max": 0.32258859276771545, |
| "mean": -7.339326657529455e-06, |
| "std": 0.037206824868917465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.261901378631592, |
| "max": 4.204929351806641, |
| "mean": -0.02642371505498886, |
| "std": 1.0068365335464478, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2388344258069992, |
| "max": 0.24378669261932373, |
| "mean": -2.555117680458352e-05, |
| "std": 0.0432158038020134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06242268532514572, |
| "max": 0.0566251203417778, |
| "mean": 0.00035173987271264195, |
| "std": 0.01414910051971674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43747568130493164, |
| "max": 0.3737330734729767, |
| "mean": 1.4612624909204897e-05, |
| "std": 0.04412786290049553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.0961233526468277, |
| "max": 0.1762983798980713, |
| "mean": -0.000659514800645411, |
| "std": 0.03514162451028824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.42177778482437134, |
| "max": 1.0692633390426636, |
| "mean": 0.7485724687576294, |
| "std": 0.04206255078315735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2659589648246765, |
| "max": 0.29692542552948, |
| "mean": -7.890580309322104e-05, |
| "std": 0.040813855826854706, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18484872579574585, |
| "max": 0.04314016178250313, |
| "mean": -0.03681201860308647, |
| "std": 0.02558443695306778, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.457691490650177, |
| "max": 0.4868350028991699, |
| "mean": 4.39733594248537e-05, |
| "std": 0.0542210191488266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2863001823425293, |
| "max": 0.5517781972885132, |
| "mean": -0.0008814089233055711, |
| "std": 0.047833118587732315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29263076186180115, |
| "max": 0.32270461320877075, |
| "mean": 6.018684871378355e-06, |
| "std": 0.019972756505012512, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2913359999656677, |
| "max": 0.7601139545440674, |
| "mean": 0.6508511304855347, |
| "std": 0.052110809832811356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24366426467895508, |
| "max": 0.26166871190071106, |
| "mean": -5.6619760471221525e-06, |
| "std": 0.039614126086235046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.26755285263061523, |
| "max": 0.20015348494052887, |
| "mean": -0.0008774641901254654, |
| "std": 0.05177554860711098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2722264528274536, |
| "max": 0.2537742853164673, |
| "mean": 5.269570010568714e-06, |
| "std": 0.038710836321115494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.966026306152344, |
| "max": 15.947823524475098, |
| "mean": 0.0332300066947937, |
| "std": 1.989342451095581, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20672431588172913, |
| "max": 0.22581705451011658, |
| "mean": -7.253723015310243e-05, |
| "std": 0.04055880755186081, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06943444162607193, |
| "max": 0.06314389407634735, |
| "mean": 0.00015862843429204077, |
| "std": 0.0147479847073555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46546468138694763, |
| "max": 0.32013440132141113, |
| "mean": 1.955418883881066e-05, |
| "std": 0.04059435427188873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06408563256263733, |
| "max": 0.11556272953748703, |
| "mean": 0.0011989418417215347, |
| "std": 0.02470807358622551, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3750011920928955, |
| "max": 0.9319288730621338, |
| "mean": 0.7511273622512817, |
| "std": 0.04018896445631981, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27909016609191895, |
| "max": 0.27321043610572815, |
| "mean": -0.00016836788563523442, |
| "std": 0.04100494086742401, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19844156503677368, |
| "max": 0.051351871341466904, |
| "mean": -0.032028019428253174, |
| "std": 0.025079041719436646, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6585158705711365, |
| "max": 0.5356709957122803, |
| "mean": -5.047450395068154e-05, |
| "std": 0.05285719037055969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1926739513874054, |
| "max": 0.5822402238845825, |
| "mean": -0.0005105392774567008, |
| "std": 0.04108486697077751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41757693886756897, |
| "max": 0.37195414304733276, |
| "mean": 6.520090209960472e-06, |
| "std": 0.021627968177199364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21454279124736786, |
| "max": 0.746727705001831, |
| "mean": 0.6494921445846558, |
| "std": 0.05432972311973572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20945341885089874, |
| "max": 0.19550970196723938, |
| "mean": 4.009851181763224e-05, |
| "std": 0.03945960849523544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32960787415504456, |
| "max": 0.25966984033584595, |
| "mean": -0.003232899820432067, |
| "std": 0.056286394596099854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20589140057563782, |
| "max": 0.25466933846473694, |
| "mean": 5.40036016900558e-05, |
| "std": 0.03856228291988373, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.243993759155273, |
| "max": 6.932845115661621, |
| "mean": 0.048340775072574615, |
| "std": 1.3851999044418335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20977123081684113, |
| "max": 0.23046547174453735, |
| "mean": -4.7887324399198405e-06, |
| "std": 0.041317813098430634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.043830934911966324, |
| "max": 0.0359884537756443, |
| "mean": -6.7679648054763675e-06, |
| "std": 0.012799433432519436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3975262939929962, |
| "max": 0.34497249126434326, |
| "mean": -5.5380802223226056e-05, |
| "std": 0.04239468649029732, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055168669670820236, |
| "max": 0.06281793117523193, |
| "mean": 0.0003579839540179819, |
| "std": 0.018675317987799644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35081058740615845, |
| "max": 1.0451138019561768, |
| "mean": 0.7896714210510254, |
| "std": 0.04873151332139969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.333694726228714, |
| "max": 0.38623932003974915, |
| "mean": -0.00016907340614125133, |
| "std": 0.04149046167731285, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15751884877681732, |
| "max": 0.05906709283590317, |
| "mean": -0.03182389587163925, |
| "std": 0.0251007080078125, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6963667273521423, |
| "max": 0.46923714876174927, |
| "mean": -8.512083149980754e-05, |
| "std": 0.05180640146136284, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24786238372325897, |
| "max": 0.3288760185241699, |
| "mean": -0.00026252405950799584, |
| "std": 0.04145393148064613, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.28698989748954773, |
| "max": 0.350361168384552, |
| "mean": -2.7725566269509727e-06, |
| "std": 0.02424115315079689, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19679424166679382, |
| "max": 0.7790785431861877, |
| "mean": 0.6702431440353394, |
| "std": 0.05866772681474686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22908955812454224, |
| "max": 0.23140233755111694, |
| "mean": -2.085999039991293e-05, |
| "std": 0.04043996334075928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22004202008247375, |
| "max": 0.24097159504890442, |
| "mean": 0.0007790824165567756, |
| "std": 0.055850621312856674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.2167646586894989, |
| "max": 0.226406067609787, |
| "mean": -7.223833381431177e-05, |
| "std": 0.039374157786369324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.906242370605469, |
| "max": 9.069114685058594, |
| "mean": -0.0012542838230729103, |
| "std": 1.8484386205673218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.26939529180526733, |
| "max": 0.258998304605484, |
| "mean": 4.3638072384055704e-05, |
| "std": 0.0384107306599617, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.0579773373901844, |
| "max": 0.057985395193099976, |
| "mean": 0.0003543748171068728, |
| "std": 0.01471623033285141, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26387640833854675, |
| "max": 0.28812822699546814, |
| "mean": -6.169013795442879e-05, |
| "std": 0.0390775129199028, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04410848394036293, |
| "max": 0.03735562041401863, |
| "mean": -9.80982295004651e-05, |
| "std": 0.013347214087843895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.33935481309890747, |
| "max": 1.0925333499908447, |
| "mean": 0.8639740347862244, |
| "std": 0.06387708336114883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42313116788864136, |
| "max": 0.41907814145088196, |
| "mean": 0.0003136416198685765, |
| "std": 0.04351295530796051, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21479536592960358, |
| "max": 0.17072512209415436, |
| "mean": -0.029444200918078423, |
| "std": 0.0318748876452446, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5986181497573853, |
| "max": 0.5598904490470886, |
| "mean": -0.00014800383360125124, |
| "std": 0.05346141383051872, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17892269790172577, |
| "max": 0.37738052010536194, |
| "mean": 0.0013508039992302656, |
| "std": 0.03731485456228256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39432692527770996, |
| "max": 0.36881834268569946, |
| "mean": 3.763254062505439e-05, |
| "std": 0.028617430478334427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2906792163848877, |
| "max": 0.8274716138839722, |
| "mean": 0.7055441737174988, |
| "std": 0.06783536076545715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9265903830528259, |
| "max": 1.027007818222046, |
| "mean": -2.7936879632761702e-05, |
| "std": 0.04764379560947418, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8793500661849976, |
| "max": 0.8158687949180603, |
| "mean": -0.0002950741327367723, |
| "std": 0.09555269032716751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27022066712379456, |
| "max": 0.24093179404735565, |
| "mean": -2.251441401313059e-05, |
| "std": 0.0389498770236969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.743555068969727, |
| "max": 22.852014541625977, |
| "mean": -0.09188339114189148, |
| "std": 4.070625305175781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22778554260730743, |
| "max": 0.24572508037090302, |
| "mean": -2.547786607465241e-05, |
| "std": 0.03864147141575813, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06017241254448891, |
| "max": 0.045427631586790085, |
| "mean": -0.00013617021613754332, |
| "std": 0.014690100215375423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3379840552806854, |
| "max": 0.3750169575214386, |
| "mean": 7.478654879378155e-06, |
| "std": 0.040820397436618805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04619982838630676, |
| "max": 0.19537773728370667, |
| "mean": 0.0002735886082518846, |
| "std": 0.013551585376262665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37374061346054077, |
| "max": 1.1302894353866577, |
| "mean": 0.8902378082275391, |
| "std": 0.0640074834227562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4474950134754181, |
| "max": 0.542551577091217, |
| "mean": 2.5157038180623204e-05, |
| "std": 0.0455806665122509, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.2237873524427414, |
| "max": 0.08737614750862122, |
| "mean": -0.03201454132795334, |
| "std": 0.03774423152208328, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7263057827949524, |
| "max": 0.6888318657875061, |
| "mean": 3.633538290159777e-05, |
| "std": 0.0517943874001503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1743825227022171, |
| "max": 0.21823401749134064, |
| "mean": 3.549834946170449e-05, |
| "std": 0.031774841248989105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3394811451435089, |
| "max": 0.37303876876831055, |
| "mean": 4.305133916204795e-05, |
| "std": 0.034135352820158005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31772536039352417, |
| "max": 1.2872265577316284, |
| "mean": 0.6015347242355347, |
| "std": 0.08348645269870758, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2831268906593323, |
| "max": 0.26034945249557495, |
| "mean": -3.016911477971007e-06, |
| "std": 0.03598069027066231, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23578572273254395, |
| "max": 0.20580488443374634, |
| "mean": 0.00023967580636963248, |
| "std": 0.056039854884147644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43542858958244324, |
| "max": 0.32475200295448303, |
| "mean": 2.4229491828009486e-05, |
| "std": 0.034124139696359634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.546597957611084, |
| "max": 7.314022064208984, |
| "mean": -0.007369913160800934, |
| "std": 0.6993920803070068, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34419700503349304, |
| "max": 0.36281776428222656, |
| "mean": 0.00010317970009054989, |
| "std": 0.04783639311790466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07377609610557556, |
| "max": 0.06036657840013504, |
| "mean": 0.0009365753503516316, |
| "std": 0.014937076717615128, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2563660442829132, |
| "max": 0.28687092661857605, |
| "mean": 4.898875886283349e-06, |
| "std": 0.04156457632780075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.055319979786872864, |
| "max": 0.06281081587076187, |
| "mean": 0.000127265666378662, |
| "std": 0.007150812540203333, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4940038025379181, |
| "max": 1.220664620399475, |
| "mean": 1.0135600566864014, |
| "std": 0.11748378723859787, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0940163135528564, |
| "max": 1.0475441217422485, |
| "mean": -4.872599311056547e-05, |
| "std": 0.05241787061095238, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.2236405611038208, |
| "max": 0.1730623096227646, |
| "mean": -0.027228206396102905, |
| "std": 0.0363101065158844, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8842402696609497, |
| "max": 0.9227275252342224, |
| "mean": -0.00014601278235204518, |
| "std": 0.05329864099621773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.1710553914308548, |
| "max": 0.3796318471431732, |
| "mean": 0.0033668535761535168, |
| "std": 0.03987643122673035, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7777752876281738, |
| "max": 0.722641110420227, |
| "mean": 1.80296028702287e-05, |
| "std": 0.0461542084813118, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3386844992637634, |
| "max": 1.4281909465789795, |
| "mean": 0.9485001564025879, |
| "std": 0.20679982006549835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.745824933052063, |
| "max": 1.7045180797576904, |
| "mean": 0.0002270373224746436, |
| "std": 0.15870553255081177, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1994949579238892, |
| "max": 1.1009647846221924, |
| "mean": -0.009547820314764977, |
| "std": 0.20390011370182037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4209446907043457, |
| "max": 0.42817720770835876, |
| "mean": 6.392307841451839e-05, |
| "std": 0.04802021011710167, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.74793243408203, |
| "max": 19.543048858642578, |
| "mean": -0.2483428716659546, |
| "std": 4.7770676612854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32391926646232605, |
| "max": 0.438634991645813, |
| "mean": -1.1790625649155118e-05, |
| "std": 0.04616706818342209, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03377115726470947, |
| "max": 0.03684735298156738, |
| "mean": 0.0006395116215571761, |
| "std": 0.012911375612020493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7035614252090454, |
| "max": 0.6690102815628052, |
| "mean": 4.2652536649256945e-05, |
| "std": 0.0578920915722847, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07234025001525879, |
| "max": 0.06776763498783112, |
| "mean": -0.00013464699441101402, |
| "std": 0.012891847640275955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38047194480895996, |
| "max": 1.39299738407135, |
| "mean": 1.06674325466156, |
| "std": 0.2197609543800354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6165490746498108, |
| "max": 0.7185496091842651, |
| "mean": 0.00011303066276013851, |
| "std": 0.05802777782082558, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21881279349327087, |
| "max": 0.22498759627342224, |
| "mean": 0.00618295231834054, |
| "std": 0.04969846084713936, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.629830002784729, |
| "max": 0.8896750807762146, |
| "mean": 1.2404842891555745e-05, |
| "std": 0.023545295000076294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5068444013595581, |
| "max": 0.47373077273368835, |
| "mean": -0.0030198940075933933, |
| "std": 0.06924331188201904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5382840037345886, |
| "max": 1.1801176071166992, |
| "mean": 0.7828130722045898, |
| "std": 0.09876110404729843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.267057865858078, |
| "max": 0.212993323802948, |
| "mean": -0.0002232328843092546, |
| "std": 0.054005783051252365, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23836649954319, |
| "max": 0.014864332042634487, |
| "mean": -0.043917927891016006, |
| "std": 0.03428623452782631, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |