| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43027451634407043, |
| "max": 0.2986099123954773, |
| "mean": -0.0025507817044854164, |
| "std": 0.04255499690771103, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06311193853616714, |
| "max": 0.10768741369247437, |
| "mean": 0.0006200151983648539, |
| "std": 0.03410356491804123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41268283128738403, |
| "max": 0.8365557193756104, |
| "mean": -0.00020680355373769999, |
| "std": 0.02410806156694889, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11543754488229752, |
| "max": 0.3218643069267273, |
| "mean": -0.0009378742543049157, |
| "std": 0.019571715965867043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.792863130569458, |
| "max": 2.8707633018493652, |
| "mean": -0.0003630426654126495, |
| "std": 0.6153795719146729, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27924296259880066, |
| "max": 0.3817594349384308, |
| "mean": 0.00042336067417636514, |
| "std": 0.042748212814331055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22243480384349823, |
| "max": 0.20970797538757324, |
| "mean": -0.004494894295930862, |
| "std": 0.04093479365110397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4279620349407196, |
| "max": 0.47544437646865845, |
| "mean": 3.4269442039658315e-06, |
| "std": 0.024507490918040276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32538774609565735, |
| "max": 0.15757951140403748, |
| "mean": -0.046732865273952484, |
| "std": 0.05161404609680176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4103900194168091, |
| "max": 0.3545621335506439, |
| "mean": -0.0001282805751543492, |
| "std": 0.02359895221889019, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22982768714427948, |
| "max": 0.2626851797103882, |
| "mean": -0.029157839715480804, |
| "std": 0.04937523230910301, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546607255935669, |
| "max": 0.8210369348526001, |
| "mean": 0.5255380868911743, |
| "std": 0.08102277666330338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.296828031539917, |
| "max": 0.2656802833080292, |
| "mean": -0.0004245353629812598, |
| "std": 0.032100748270750046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09280094504356384, |
| "max": 0.12531320750713348, |
| "mean": 0.0006500966264866292, |
| "std": 0.025744492188096046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2905188202857971, |
| "max": 0.28166285157203674, |
| "mean": -7.521975203417242e-05, |
| "std": 0.030932102352380753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.9063615798950195, |
| "max": 5.821039199829102, |
| "mean": -0.009349350817501545, |
| "std": 1.2963582277297974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4250166118144989, |
| "max": 0.34394335746765137, |
| "mean": 9.808164759306237e-05, |
| "std": 0.02995201013982296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.02886926755309105, |
| "max": 0.027612265199422836, |
| "mean": -0.0003159886400680989, |
| "std": 0.012566552497446537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4542844891548157, |
| "max": 0.4484859108924866, |
| "mean": 2.2895628717378713e-05, |
| "std": 0.023853421211242676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08885892480611801, |
| "max": 0.09123405814170837, |
| "mean": 0.002273206366226077, |
| "std": 0.019519906491041183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26680853962898254, |
| "max": 1.0574053525924683, |
| "mean": 0.5312761068344116, |
| "std": 0.10467371344566345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5747479200363159, |
| "max": 0.6086151599884033, |
| "mean": -0.00043056829599663615, |
| "std": 0.03859534114599228, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18226587772369385, |
| "max": 0.04570382833480835, |
| "mean": -0.029475372284650803, |
| "std": 0.04265210032463074, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1669524908065796, |
| "max": 1.6345643997192383, |
| "mean": 0.00032027901033870876, |
| "std": 0.027692919597029686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16255705058574677, |
| "max": 0.20596350729465485, |
| "mean": -0.021122729405760765, |
| "std": 0.0279533751308918, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.2242354154586792, |
| "max": 0.8446622490882874, |
| "mean": 0.4876382350921631, |
| "std": 0.07536358386278152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25569042563438416, |
| "max": 0.3060862720012665, |
| "mean": -8.35508035379462e-06, |
| "std": 0.03346911817789078, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09542153775691986, |
| "max": 0.11059843748807907, |
| "mean": 6.575271254405379e-05, |
| "std": 0.026967303827404976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29741746187210083, |
| "max": 0.2962968945503235, |
| "mean": 5.0992566684726626e-05, |
| "std": 0.03253895416855812, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.170334339141846, |
| "max": 5.090466022491455, |
| "mean": -0.014626836404204369, |
| "std": 1.1584166288375854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3447432518005371, |
| "max": 0.3434843122959137, |
| "mean": 7.888684194767848e-05, |
| "std": 0.030058253556489944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03619777783751488, |
| "max": 0.033210255205631256, |
| "mean": -0.00014313205610960722, |
| "std": 0.013021216727793217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31545811891555786, |
| "max": 0.3753635585308075, |
| "mean": -2.0908952137688175e-05, |
| "std": 0.024055080488324165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10554195195436478, |
| "max": 0.12217912822961807, |
| "mean": -0.001965724630281329, |
| "std": 0.02885899320244789, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31185638904571533, |
| "max": 1.1226844787597656, |
| "mean": 0.6664173007011414, |
| "std": 0.09809636324644089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8724010586738586, |
| "max": 0.6276066303253174, |
| "mean": 0.0016756090335547924, |
| "std": 0.04743661358952522, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27133694291114807, |
| "max": 0.034276124089956284, |
| "mean": -0.04661266878247261, |
| "std": 0.04062533751130104, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9213826656341553, |
| "max": 0.9645106792449951, |
| "mean": 0.0010220588883385062, |
| "std": 0.040701646357774734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14473342895507812, |
| "max": 0.07504827529191971, |
| "mean": -0.009093794040381908, |
| "std": 0.025712795555591583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23969869315624237, |
| "max": 0.7134895920753479, |
| "mean": 0.4472740888595581, |
| "std": 0.05947508662939072, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2730574309825897, |
| "max": 0.29789650440216064, |
| "mean": 8.741370038478635e-06, |
| "std": 0.035470616072416306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11908062547445297, |
| "max": 0.11852753162384033, |
| "mean": 0.0007502459920942783, |
| "std": 0.027627233415842056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2813098430633545, |
| "max": 0.27990853786468506, |
| "mean": -7.670064951526001e-05, |
| "std": 0.03509994596242905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.512526035308838, |
| "max": 2.5245351791381836, |
| "mean": 0.026777304708957672, |
| "std": 0.58714359998703, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22134634852409363, |
| "max": 0.2719532251358032, |
| "mean": 2.8086524253012612e-06, |
| "std": 0.030731303617358208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03337041661143303, |
| "max": 0.031244885176420212, |
| "mean": 0.0001174571443698369, |
| "std": 0.012399335391819477, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23538419604301453, |
| "max": 0.2318607121706009, |
| "mean": 5.6835913710528985e-05, |
| "std": 0.02569691836833954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13590897619724274, |
| "max": 0.12807728350162506, |
| "mean": -0.005500740837305784, |
| "std": 0.039980240166187286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3545033931732178, |
| "max": 1.174311876296997, |
| "mean": 0.7105965614318848, |
| "std": 0.10393685102462769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6176598072052002, |
| "max": 0.5552863478660583, |
| "mean": 0.001160678919404745, |
| "std": 0.046113625168800354, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.1892954707145691, |
| "max": 0.024854592978954315, |
| "mean": -0.034856364130973816, |
| "std": 0.028640495613217354, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1318156719207764, |
| "max": 0.9712402820587158, |
| "mean": 0.0003593153669498861, |
| "std": 0.04234175756573677, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5985706448554993, |
| "max": 0.0630447119474411, |
| "mean": -0.004880559165030718, |
| "std": 0.028633911162614822, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37550806999206543, |
| "max": 0.942255973815918, |
| "mean": 0.592631459236145, |
| "std": 0.06731508672237396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3917059302330017, |
| "max": 0.3694884479045868, |
| "mean": 7.032141002127901e-05, |
| "std": 0.037185318768024445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11911240965127945, |
| "max": 0.13666978478431702, |
| "mean": 0.0009285699925385416, |
| "std": 0.029224557802081108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6197025179862976, |
| "max": 0.5093265771865845, |
| "mean": 1.5340243407990783e-05, |
| "std": 0.036438170820474625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.196393966674805, |
| "max": 8.799202919006348, |
| "mean": -0.10933247208595276, |
| "std": 1.7004725933074951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2767159938812256, |
| "max": 0.23974454402923584, |
| "mean": 5.235425851424225e-05, |
| "std": 0.03261233866214752, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.051826898008584976, |
| "max": 0.039538491517305374, |
| "mean": 9.016307012643665e-05, |
| "std": 0.012965181842446327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23091718554496765, |
| "max": 0.23482012748718262, |
| "mean": -2.2171980162966065e-05, |
| "std": 0.029389047995209694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20455272495746613, |
| "max": 0.10541031509637833, |
| "mean": -0.0040219868533313274, |
| "std": 0.03264109417796135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.33964791893959045, |
| "max": 1.0138026475906372, |
| "mean": 0.7007413506507874, |
| "std": 0.0968313068151474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5653463006019592, |
| "max": 0.8341253995895386, |
| "mean": 0.0004152161709498614, |
| "std": 0.042294181883335114, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2122603803873062, |
| "max": 0.03037133999168873, |
| "mean": -0.03219597041606903, |
| "std": 0.026528161019086838, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7555134892463684, |
| "max": 0.7197405099868774, |
| "mean": -1.6411166143370792e-05, |
| "std": 0.036835070699453354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2637326717376709, |
| "max": 0.10635162889957428, |
| "mean": -0.003013473004102707, |
| "std": 0.028875315561890602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2841384708881378, |
| "max": 0.6960581541061401, |
| "mean": 0.4994935393333435, |
| "std": 0.046687543392181396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27874135971069336, |
| "max": 0.23421625792980194, |
| "mean": -0.0001108625583583489, |
| "std": 0.03875651955604553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15407033264636993, |
| "max": 0.12659268081188202, |
| "mean": -0.002232097554951906, |
| "std": 0.03336996212601662, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41471850872039795, |
| "max": 0.6599792838096619, |
| "mean": -1.8830280168913305e-05, |
| "std": 0.03909522667527199, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.242863178253174, |
| "max": 4.727988243103027, |
| "mean": -0.020436234772205353, |
| "std": 1.0083643198013306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24536164104938507, |
| "max": 0.20758995413780212, |
| "mean": 4.39189825556241e-05, |
| "std": 0.0339621901512146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03461671993136406, |
| "max": 0.04490647837519646, |
| "mean": -1.8480626749806106e-05, |
| "std": 0.012636142782866955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20118913054466248, |
| "max": 0.20648600161075592, |
| "mean": -2.914817741839215e-05, |
| "std": 0.031020086258649826, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.2001321166753769, |
| "max": 0.11347545683383942, |
| "mean": -0.0028973689768463373, |
| "std": 0.03452814370393753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36694979667663574, |
| "max": 1.0586931705474854, |
| "mean": 0.6705467104911804, |
| "std": 0.06646514683961868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39871448278427124, |
| "max": 0.5025006532669067, |
| "mean": -3.831302092294209e-05, |
| "std": 0.041130244731903076, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12894268333911896, |
| "max": 0.026869317516684532, |
| "mean": -0.030542686581611633, |
| "std": 0.021899448707699776, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4495067298412323, |
| "max": 0.43352261185646057, |
| "mean": 7.56321387598291e-05, |
| "std": 0.034890398383140564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26795703172683716, |
| "max": 0.07305809110403061, |
| "mean": -0.0010922406800091267, |
| "std": 0.023138197138905525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2873804569244385, |
| "max": 0.6860803365707397, |
| "mean": 0.5245892405509949, |
| "std": 0.047686196863651276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22270891070365906, |
| "max": 0.22395135462284088, |
| "mean": 1.5596267985529266e-05, |
| "std": 0.03894849866628647, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.1365431845188141, |
| "max": 0.1094546914100647, |
| "mean": 0.0002404236583970487, |
| "std": 0.02924003079533577, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37548792362213135, |
| "max": 0.4377880096435547, |
| "mean": -9.806113666854799e-06, |
| "std": 0.039285749197006226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8503658771514893, |
| "max": 5.0051727294921875, |
| "mean": 0.009742870926856995, |
| "std": 0.8458123803138733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.2231559306383133, |
| "max": 0.22039616107940674, |
| "mean": -2.540778041293379e-07, |
| "std": 0.03440915793180466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04364994913339615, |
| "max": 0.03587768226861954, |
| "mean": -0.00025836972054094076, |
| "std": 0.012079192325472832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21319326758384705, |
| "max": 0.1889532059431076, |
| "mean": -1.7074991774279624e-05, |
| "std": 0.031535569578409195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18106123805046082, |
| "max": 0.12093079835176468, |
| "mean": -0.0023932361509650946, |
| "std": 0.04127350077033043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.422803670167923, |
| "max": 0.9430609345436096, |
| "mean": 0.6627297401428223, |
| "std": 0.05693160742521286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37100574374198914, |
| "max": 0.476217657327652, |
| "mean": -8.213143883040175e-05, |
| "std": 0.04088921844959259, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20879434049129486, |
| "max": 0.027236830443143845, |
| "mean": -0.03024592623114586, |
| "std": 0.021377045661211014, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3412210941314697, |
| "max": 0.7347204685211182, |
| "mean": 8.198502473533154e-05, |
| "std": 0.034765854477882385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2404329776763916, |
| "max": 0.05046902596950531, |
| "mean": -0.001188310096040368, |
| "std": 0.020469345152378082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3061925768852234, |
| "max": 0.654449999332428, |
| "mean": 0.5251765251159668, |
| "std": 0.04624079912900925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30459216237068176, |
| "max": 0.21765196323394775, |
| "mean": 7.016396557446569e-05, |
| "std": 0.039494771510362625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.1493639498949051, |
| "max": 0.13124904036521912, |
| "mean": 0.00033865522709675133, |
| "std": 0.03046908602118492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2574615776538849, |
| "max": 0.20232746005058289, |
| "mean": 3.111670594080351e-05, |
| "std": 0.03948463872075081, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.339005708694458, |
| "max": 2.378676176071167, |
| "mean": -0.026260126382112503, |
| "std": 0.45006638765335083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18878056108951569, |
| "max": 0.2107384204864502, |
| "mean": 3.7163907109061256e-05, |
| "std": 0.034793294966220856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.031842123717069626, |
| "max": 0.03563522920012474, |
| "mean": -0.00019889514078386128, |
| "std": 0.012288383208215237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18877452611923218, |
| "max": 0.17039048671722412, |
| "mean": -6.83176185702905e-05, |
| "std": 0.03216997906565666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13967929780483246, |
| "max": 0.13765227794647217, |
| "mean": -0.0025106696411967278, |
| "std": 0.051296915858983994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670410752296448, |
| "max": 0.9571460485458374, |
| "mean": 0.668942928314209, |
| "std": 0.052938032895326614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32432013750076294, |
| "max": 0.30918803811073303, |
| "mean": -9.502464308752678e-07, |
| "std": 0.040945153683423996, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12509244680404663, |
| "max": 0.025560801848769188, |
| "mean": -0.03070145845413208, |
| "std": 0.019835734739899635, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44014739990234375, |
| "max": 0.44575265049934387, |
| "mean": 9.502484317636117e-05, |
| "std": 0.035118650645017624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22483481466770172, |
| "max": 0.05185456946492195, |
| "mean": -0.0011811171425506473, |
| "std": 0.018479909747838974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33911338448524475, |
| "max": 0.7404670715332031, |
| "mean": 0.5587128400802612, |
| "std": 0.04148301109671593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2731746435165405, |
| "max": 0.2787404954433441, |
| "mean": 2.032621341641061e-05, |
| "std": 0.04105671867728233, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13706564903259277, |
| "max": 0.14011380076408386, |
| "mean": 0.0004902533255517483, |
| "std": 0.026642272248864174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49108198285102844, |
| "max": 0.35628437995910645, |
| "mean": 8.894230268197134e-05, |
| "std": 0.040694475173950195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.300570249557495, |
| "max": 1.7478224039077759, |
| "mean": -0.021113090217113495, |
| "std": 0.5004414319992065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21800002455711365, |
| "max": 0.19787649810314178, |
| "mean": -4.053436714457348e-05, |
| "std": 0.034232478588819504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04136132448911667, |
| "max": 0.03894467279314995, |
| "mean": -0.0001396951702190563, |
| "std": 0.012888246215879917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17788128554821014, |
| "max": 0.18331165611743927, |
| "mean": 4.789709782926366e-05, |
| "std": 0.031555820256471634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1802123337984085, |
| "max": 0.1839253157377243, |
| "mean": -0.0022146895062178373, |
| "std": 0.05485367402434349, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.47431865334510803, |
| "max": 1.0268715620040894, |
| "mean": 0.6453023552894592, |
| "std": 0.05052410438656807, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27176976203918457, |
| "max": 0.3096844553947449, |
| "mean": 0.0001122704561566934, |
| "std": 0.040681492537260056, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10546202212572098, |
| "max": 0.02664944902062416, |
| "mean": -0.02952582947909832, |
| "std": 0.01794532686471939, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3392390310764313, |
| "max": 0.3297179937362671, |
| "mean": 5.245600186754018e-05, |
| "std": 0.034412626177072525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.181877002120018, |
| "max": 0.042341288179159164, |
| "mean": -0.0010600005043670535, |
| "std": 0.01721755787730217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3254714906215668, |
| "max": 0.6875306367874146, |
| "mean": 0.5112907886505127, |
| "std": 0.03710601106286049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23404580354690552, |
| "max": 0.22564062476158142, |
| "mean": -3.628679769462906e-05, |
| "std": 0.03917597234249115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11565262079238892, |
| "max": 0.13205118477344513, |
| "mean": 0.00015428723418153822, |
| "std": 0.029200663790106773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3531610369682312, |
| "max": 0.28566646575927734, |
| "mean": 7.01215958542889e-06, |
| "std": 0.03924458101391792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.1371684074401855, |
| "max": 3.5479142665863037, |
| "mean": -0.011608399450778961, |
| "std": 0.6831862926483154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21112798154354095, |
| "max": 0.20956169068813324, |
| "mean": 3.4640430385479704e-05, |
| "std": 0.034484706819057465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.0358961820602417, |
| "max": 0.04827914386987686, |
| "mean": 0.000792390201240778, |
| "std": 0.012867480516433716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21074581146240234, |
| "max": 0.19335627555847168, |
| "mean": -1.3081223642075201e-06, |
| "std": 0.031695783138275146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18677425384521484, |
| "max": 0.17732204496860504, |
| "mean": -0.002835639752447605, |
| "std": 0.05864328145980835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4744804799556732, |
| "max": 1.0434356927871704, |
| "mean": 0.6514811515808105, |
| "std": 0.04996025562286377, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24828216433525085, |
| "max": 0.3291241526603699, |
| "mean": 0.00018075907428283244, |
| "std": 0.04056989774107933, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12511543929576874, |
| "max": 0.024807237088680267, |
| "mean": -0.03050871379673481, |
| "std": 0.017624877393245697, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42125385999679565, |
| "max": 0.4822184443473816, |
| "mean": -1.4134266166365705e-06, |
| "std": 0.03539677709341049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.1517147570848465, |
| "max": 0.043470486998558044, |
| "mean": 4.9440553993918e-05, |
| "std": 0.014891887083649635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31546592712402344, |
| "max": 0.6829473972320557, |
| "mean": 0.552940845489502, |
| "std": 0.0407881923019886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20658527314662933, |
| "max": 0.2199694663286209, |
| "mean": 3.1865805794950575e-05, |
| "std": 0.03829915076494217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.1380588412284851, |
| "max": 0.11287239193916321, |
| "mean": 2.8096917958464473e-05, |
| "std": 0.025843404233455658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4030921161174774, |
| "max": 0.37124574184417725, |
| "mean": 2.583605601103045e-05, |
| "std": 0.03817982226610184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7753050327301025, |
| "max": 2.8720550537109375, |
| "mean": 0.001174271572381258, |
| "std": 0.5172262787818909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.203634575009346, |
| "max": 0.19783173501491547, |
| "mean": 2.9641731089213863e-05, |
| "std": 0.034296903759241104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050782062113285065, |
| "max": 0.039943333715200424, |
| "mean": -0.00042034429498016834, |
| "std": 0.01341927982866764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19659629464149475, |
| "max": 0.20229847729206085, |
| "mean": -1.2495337614382152e-05, |
| "std": 0.03180486336350441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19323131442070007, |
| "max": 0.19526611268520355, |
| "mean": -0.002963971346616745, |
| "std": 0.06255338340997696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34901732206344604, |
| "max": 1.0851324796676636, |
| "mean": 0.6672203540802002, |
| "std": 0.055461570620536804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22590196132659912, |
| "max": 0.2515060603618622, |
| "mean": 0.0003586675738915801, |
| "std": 0.040759582072496414, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09132344275712967, |
| "max": 0.043738022446632385, |
| "mean": -0.030089886859059334, |
| "std": 0.017626678571105003, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35337263345718384, |
| "max": 0.30428504943847656, |
| "mean": -4.39239593106322e-05, |
| "std": 0.03712212294340134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16189776360988617, |
| "max": 0.06336814165115356, |
| "mean": -8.093340147752315e-05, |
| "std": 0.019419532269239426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.3486655354499817, |
| "max": 0.7230467796325684, |
| "mean": 0.5424184799194336, |
| "std": 0.03920904919505119, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21948550641536713, |
| "max": 0.22342580556869507, |
| "mean": -1.1189426913915668e-05, |
| "std": 0.039230361580848694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11856742948293686, |
| "max": 0.17064979672431946, |
| "mean": 0.0002859297674149275, |
| "std": 0.025129808112978935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24677123129367828, |
| "max": 0.30096495151519775, |
| "mean": -3.686630952870473e-05, |
| "std": 0.03892983868718147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5091044902801514, |
| "max": 3.718792676925659, |
| "mean": 0.01584971882402897, |
| "std": 0.7831407189369202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21897639334201813, |
| "max": 0.23756206035614014, |
| "mean": -1.3331029549590312e-05, |
| "std": 0.036302708089351654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04721689596772194, |
| "max": 0.05136079713702202, |
| "mean": 0.00047709030332043767, |
| "std": 0.013516037724912167, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.2142534703016281, |
| "max": 0.21756578981876373, |
| "mean": 5.647125362884253e-05, |
| "std": 0.03361497074365616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21157103776931763, |
| "max": 0.23160234093666077, |
| "mean": -0.005100839305669069, |
| "std": 0.06188952922821045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.3621511459350586, |
| "max": 1.1025023460388184, |
| "mean": 0.6993520259857178, |
| "std": 0.05383123829960823, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23499585688114166, |
| "max": 0.2451109141111374, |
| "mean": 0.00046343874419108033, |
| "std": 0.04126851260662079, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.0981631875038147, |
| "max": 0.06831478327512741, |
| "mean": -0.031439878046512604, |
| "std": 0.01814098283648491, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3021915853023529, |
| "max": 0.3518403172492981, |
| "mean": -8.213460387196392e-05, |
| "std": 0.04027426242828369, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1524408757686615, |
| "max": 0.14984285831451416, |
| "mean": 0.0002571163640823215, |
| "std": 0.02304430864751339, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9986459016799927, |
| "max": 1.0047640800476074, |
| "mean": 0.9998321533203125, |
| "std": 0.000813807244412601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031263623386621475, |
| "max": 0.03126571327447891, |
| "mean": -1.928813617269043e-05, |
| "std": 0.01804114319384098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031225843355059624, |
| "max": 0.030984507873654366, |
| "mean": -0.001084179850295186, |
| "std": 0.01795078068971634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031264401972293854, |
| "max": 0.03126936033368111, |
| "mean": 3.5438486065686448e-06, |
| "std": 0.018041551113128662, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031160537153482437, |
| "max": 0.031171930953860283, |
| "mean": 0.00033398409141227603, |
| "std": 0.01806296594440937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006154034635983407, |
| "max": 0.00041452725417912006, |
| "mean": 1.3732544630329357e-06, |
| "std": 0.00013773542013950646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9981350898742676, |
| "max": 1.0061345100402832, |
| "mean": 1.0003111362457275, |
| "std": 0.0018558463780209422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03275563195347786, |
| "max": 0.032837994396686554, |
| "mean": -6.685876542178448e-06, |
| "std": 0.018042754381895065, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03275851905345917, |
| "max": 0.03259003907442093, |
| "mean": -0.00013117710477672517, |
| "std": 0.017956379801034927, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.0011762815993279219, |
| "max": 0.0011538960970938206, |
| "mean": 3.6382635926202056e-07, |
| "std": 0.00021428015315905213, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005257476586848497, |
| "max": 0.0003992951533291489, |
| "mean": 2.2647066089120926e-06, |
| "std": 0.00012679416977334768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3831771910190582, |
| "max": 0.7203002572059631, |
| "mean": 0.5807632207870483, |
| "std": 0.039030127227306366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.238657608628273, |
| "max": 0.1965981125831604, |
| "mean": 2.6105446522706188e-05, |
| "std": 0.03746547922492027, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11904074251651764, |
| "max": 0.16665399074554443, |
| "mean": 0.0009819172555580735, |
| "std": 0.027577750384807587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2464642971754074, |
| "max": 0.5006471276283264, |
| "mean": -5.0186910812044516e-05, |
| "std": 0.03762289881706238, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.946474552154541, |
| "max": 3.7734150886535645, |
| "mean": -0.0035824859514832497, |
| "std": 0.681806743144989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22754359245300293, |
| "max": 0.25217491388320923, |
| "mean": -1.1530558367667254e-05, |
| "std": 0.03743445873260498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07175272703170776, |
| "max": 0.08072981238365173, |
| "mean": -0.0005130038480274379, |
| "std": 0.015667041763663292, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22810040414333344, |
| "max": 0.2579977512359619, |
| "mean": -2.8758044209098443e-05, |
| "std": 0.03542134538292885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20080700516700745, |
| "max": 0.2153109759092331, |
| "mean": -0.005534037947654724, |
| "std": 0.0683637484908104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4053238332271576, |
| "max": 1.1908336877822876, |
| "mean": 0.7380030155181885, |
| "std": 0.05547412484884262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22124385833740234, |
| "max": 0.24569396674633026, |
| "mean": 0.0005211688112467527, |
| "std": 0.041335880756378174, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10345371812582016, |
| "max": 0.024234607815742493, |
| "mean": -0.032675523310899734, |
| "std": 0.018910475075244904, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4498954117298126, |
| "max": 0.42273956537246704, |
| "mean": -0.00043416087282821536, |
| "std": 0.04689621180295944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2517021596431732, |
| "max": 0.4706237316131592, |
| "mean": 0.0032027317211031914, |
| "std": 0.04455312713980675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3169791102409363, |
| "max": 0.3331950604915619, |
| "mean": -2.5209596060449257e-05, |
| "std": 0.021287448704242706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3245099186897278, |
| "max": 0.6862163543701172, |
| "mean": 0.5710394978523254, |
| "std": 0.04481911659240723, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.1645577996969223, |
| "max": 0.17449714243412018, |
| "mean": -4.883324072579853e-05, |
| "std": 0.0331808440387249, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18696466088294983, |
| "max": 0.14305275678634644, |
| "mean": 4.307446943130344e-05, |
| "std": 0.029701771214604378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.381203293800354, |
| "max": 0.24647706747055054, |
| "mean": -9.961708201444708e-06, |
| "std": 0.032761868089437485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6597650051116943, |
| "max": 3.293627977371216, |
| "mean": -0.014285150915384293, |
| "std": 0.9855467677116394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23503181338310242, |
| "max": 0.24772128462791443, |
| "mean": -1.80145725607872e-05, |
| "std": 0.04169723764061928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07274845242500305, |
| "max": 0.15466810762882233, |
| "mean": 0.0006658544880338013, |
| "std": 0.025178011506795883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2665092945098877, |
| "max": 0.2483654022216797, |
| "mean": -1.536182753625326e-05, |
| "std": 0.04013803228735924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.1897987425327301, |
| "max": 0.19495300948619843, |
| "mean": -0.001235135248862207, |
| "std": 0.06669139117002487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32910633087158203, |
| "max": 1.0014653205871582, |
| "mean": 0.7192941308021545, |
| "std": 0.05263138189911842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23197908699512482, |
| "max": 0.24564941227436066, |
| "mean": 0.0001828196254791692, |
| "std": 0.04089989513158798, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11449356377124786, |
| "max": 0.019026821479201317, |
| "mean": -0.042487140744924545, |
| "std": 0.018874552100896835, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.39025211334228516, |
| "max": 0.40785497426986694, |
| "mean": -2.1506561097339727e-05, |
| "std": 0.04853347688913345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6939337849617004, |
| "max": 0.4130322337150574, |
| "mean": 0.0008477974915876985, |
| "std": 0.06032131612300873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0010383415501564741, |
| "max": 1.0005052089691162, |
| "mean": 0.00048820566735230386, |
| "std": 0.02208903431892395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9979493021965027, |
| "max": 1.0028773546218872, |
| "mean": 0.9996361136436462, |
| "std": 0.0005558156408369541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031263865530490875, |
| "max": 0.03126693516969681, |
| "mean": -2.1029807612649165e-05, |
| "std": 0.018032483756542206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031225642189383507, |
| "max": 0.031231923028826714, |
| "mean": -0.000677043863106519, |
| "std": 0.017827108502388, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.031264521181583405, |
| "max": 0.03126373142004013, |
| "mean": -8.835060725687072e-06, |
| "std": 0.018031509593129158, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031228171661496162, |
| "max": 0.031247133389115334, |
| "mean": -0.0007299243006855249, |
| "std": 0.017942015081644058, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00041853971197269857, |
| "max": 0.0003325868456158787, |
| "mean": -3.1447550554730697e-06, |
| "std": 0.0001163617562269792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9978473782539368, |
| "max": 1.0059432983398438, |
| "mean": 0.9999491572380066, |
| "std": 0.001859705662354827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03244250267744064, |
| "max": 0.0323757641017437, |
| "mean": -1.7303907497989712e-06, |
| "std": 0.018027959391474724, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.032130636274814606, |
| "max": 0.03116563893854618, |
| "mean": -0.0003740063984878361, |
| "std": 0.01804370991885662, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0012801800621673465, |
| "max": 0.0011148827616125345, |
| "mean": -8.956569672591286e-07, |
| "std": 0.00020970198966097087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0003423716698307544, |
| "max": 0.00029734382405877113, |
| "mean": -3.7682302718167193e-06, |
| "std": 0.00010476629540789872, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.2343551367521286, |
| "max": 0.2724533975124359, |
| "mean": 6.777756425435655e-06, |
| "std": 0.018809394910931587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32130253314971924, |
| "max": 0.6949947476387024, |
| "mean": 0.5816991329193115, |
| "std": 0.04608374834060669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18193963170051575, |
| "max": 0.19776132702827454, |
| "mean": -1.1586925211304333e-05, |
| "std": 0.033183593302965164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16079005599021912, |
| "max": 0.12958164513111115, |
| "mean": -0.0010761492885649204, |
| "std": 0.03415785729885101, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33248019218444824, |
| "max": 0.31138068437576294, |
| "mean": -1.0150852176593617e-05, |
| "std": 0.0322343148291111, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.811703681945801, |
| "max": 8.77199935913086, |
| "mean": 0.09351971745491028, |
| "std": 1.6208088397979736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23395448923110962, |
| "max": 0.24196705222129822, |
| "mean": 4.150588938500732e-05, |
| "std": 0.04085612669587135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07599986344575882, |
| "max": 0.06580105423927307, |
| "mean": 0.0004830547550227493, |
| "std": 0.019416898488998413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24603235721588135, |
| "max": 0.23429378867149353, |
| "mean": -3.1053496059030294e-06, |
| "std": 0.039430778473615646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16335651278495789, |
| "max": 0.16123652458190918, |
| "mean": 0.001627025194466114, |
| "std": 0.0652812197804451, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5569519996643066, |
| "max": 0.9448988437652588, |
| "mean": 0.712960422039032, |
| "std": 0.040366582572460175, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.2282220721244812, |
| "max": 0.255278617143631, |
| "mean": -4.5689772377954796e-05, |
| "std": 0.04057461395859718, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1351189911365509, |
| "max": 0.02213732711970806, |
| "mean": -0.04135933890938759, |
| "std": 0.018408460542559624, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4218907952308655, |
| "max": 0.39247259497642517, |
| "mean": -4.45842306362465e-06, |
| "std": 0.04778381064534187, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6081869602203369, |
| "max": 0.6523037552833557, |
| "mean": 0.0015862288419157267, |
| "std": 0.0568697564303875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25164270401000977, |
| "max": 0.32068535685539246, |
| "mean": -6.094380296417512e-06, |
| "std": 0.019612763077020645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3596651554107666, |
| "max": 0.6836386322975159, |
| "mean": 0.5707623958587646, |
| "std": 0.04307318106293678, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.2204994410276413, |
| "max": 0.17691564559936523, |
| "mean": -3.469674993539229e-05, |
| "std": 0.034298643469810486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16342805325984955, |
| "max": 0.23329652845859528, |
| "mean": 0.0003627383557613939, |
| "std": 0.03284167870879173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26406827569007874, |
| "max": 0.24012491106987, |
| "mean": -5.2815768867731094e-05, |
| "std": 0.033897485584020615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.859966278076172, |
| "max": 5.0964674949646, |
| "mean": 0.04393793269991875, |
| "std": 1.230094075202942, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.2463618665933609, |
| "max": 0.250487744808197, |
| "mean": 7.235530210891739e-05, |
| "std": 0.04398680850863457, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06267692148685455, |
| "max": 0.054532695561647415, |
| "mean": 0.000642440456431359, |
| "std": 0.017191536724567413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2865088880062103, |
| "max": 0.272175133228302, |
| "mean": -5.016334762331098e-05, |
| "std": 0.042984914034605026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.1612030565738678, |
| "max": 0.1705736219882965, |
| "mean": -0.0028862706385552883, |
| "std": 0.05929599329829216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5197344422340393, |
| "max": 0.9341347813606262, |
| "mean": 0.7135534286499023, |
| "std": 0.03866534307599068, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23811031877994537, |
| "max": 0.24873413145542145, |
| "mean": 0.0004648095346055925, |
| "std": 0.04045360907912254, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14526499807834625, |
| "max": 0.041103385388851166, |
| "mean": -0.03970393165946007, |
| "std": 0.02056412398815155, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5330354571342468, |
| "max": 0.5828887820243835, |
| "mean": 5.7578072301112115e-06, |
| "std": 0.04885943979024887, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5195844769477844, |
| "max": 0.4939325749874115, |
| "mean": 0.002366485306993127, |
| "std": 0.05347662419080734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.273802787065506, |
| "max": 0.3155968487262726, |
| "mean": 2.01077523342974e-06, |
| "std": 0.02004941552877426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36614885926246643, |
| "max": 0.7128685116767883, |
| "mean": 0.5932222604751587, |
| "std": 0.04609934985637665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21103930473327637, |
| "max": 0.19931277632713318, |
| "mean": 3.062984978896566e-05, |
| "std": 0.03486598655581474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18738499283790588, |
| "max": 0.20401518046855927, |
| "mean": 0.0009546762448735535, |
| "std": 0.031527843326330185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2900747060775757, |
| "max": 0.3402419686317444, |
| "mean": -4.711254223366268e-05, |
| "std": 0.03458685800433159, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.881408214569092, |
| "max": 3.3909339904785156, |
| "mean": 0.014485932886600494, |
| "std": 0.8588526248931885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22469207644462585, |
| "max": 0.2501186430454254, |
| "mean": -3.7895424611633644e-06, |
| "std": 0.04222952574491501, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05539275333285332, |
| "max": 0.046729691326618195, |
| "mean": -1.6585952835157514e-05, |
| "std": 0.01585092395544052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29304519295692444, |
| "max": 0.2904603183269501, |
| "mean": -7.356060450547375e-06, |
| "std": 0.04194435849785805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.1251518875360489, |
| "max": 0.25925886631011963, |
| "mean": -0.0032416037283837795, |
| "std": 0.05317998677492142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4565337300300598, |
| "max": 0.8454437851905823, |
| "mean": 0.7055786848068237, |
| "std": 0.035420604050159454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5118804574012756, |
| "max": 0.34804508090019226, |
| "mean": 0.00034280645195394754, |
| "std": 0.040198732167482376, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18613915145397186, |
| "max": 0.03958306089043617, |
| "mean": -0.03939869999885559, |
| "std": 0.021371137350797653, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.544677197933197, |
| "max": 0.5565076470375061, |
| "mean": -7.158219523262233e-05, |
| "std": 0.050734151154756546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5122924447059631, |
| "max": 0.6649084091186523, |
| "mean": 0.002443553414195776, |
| "std": 0.04954148083925247, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3326307237148285, |
| "max": 0.2655903100967407, |
| "mean": 3.417561856622342e-06, |
| "std": 0.01938662678003311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32189854979515076, |
| "max": 0.7676428556442261, |
| "mean": 0.6510834097862244, |
| "std": 0.045412834733724594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24963903427124023, |
| "max": 0.21975325047969818, |
| "mean": -2.1360538084991276e-06, |
| "std": 0.03650053218007088, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3272336423397064, |
| "max": 0.2872598171234131, |
| "mean": -0.000690902175847441, |
| "std": 0.038575589656829834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3104217052459717, |
| "max": 0.3704308867454529, |
| "mean": 6.501967436634004e-05, |
| "std": 0.03624104708433151, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.723941326141357, |
| "max": 5.815830707550049, |
| "mean": 0.03795095533132553, |
| "std": 1.4143388271331787, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22184839844703674, |
| "max": 0.20582044124603271, |
| "mean": -7.514897151850164e-05, |
| "std": 0.04248502478003502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07779642939567566, |
| "max": 0.05152571201324463, |
| "mean": -0.0009286667918786407, |
| "std": 0.016416585072875023, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33085039258003235, |
| "max": 0.3292792737483978, |
| "mean": -4.624932898877887e-06, |
| "std": 0.04279141500592232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2850324511528015, |
| "max": 0.11214210838079453, |
| "mean": -0.0012058319989591837, |
| "std": 0.04702144116163254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48610714077949524, |
| "max": 0.8880516886711121, |
| "mean": 0.7374852299690247, |
| "std": 0.038454823195934296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3623265027999878, |
| "max": 0.2744399905204773, |
| "mean": 5.1268329116282985e-05, |
| "std": 0.04064424708485603, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24789389967918396, |
| "max": 0.046399183571338654, |
| "mean": -0.0392770953476429, |
| "std": 0.023303059861063957, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6267192959785461, |
| "max": 0.5975406765937805, |
| "mean": -6.142957136034966e-05, |
| "std": 0.05311630666255951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7102671265602112, |
| "max": 0.2661624252796173, |
| "mean": 0.0009175186860375106, |
| "std": 0.05124976858496666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433755040168762, |
| "max": 0.30368152260780334, |
| "mean": 1.5963701116561424e-07, |
| "std": 0.01913503371179104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34989097714424133, |
| "max": 0.7839252948760986, |
| "mean": 0.6388714909553528, |
| "std": 0.04933994635939598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.2052908092737198, |
| "max": 0.20688343048095703, |
| "mean": -5.992479418637231e-05, |
| "std": 0.03769543766975403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2588193416595459, |
| "max": 0.26830655336380005, |
| "mean": -0.00039892495260573924, |
| "std": 0.044624269008636475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3542262613773346, |
| "max": 0.3225662410259247, |
| "mean": -6.961288363527274e-06, |
| "std": 0.037203844636678696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.267129421234131, |
| "max": 4.20892858505249, |
| "mean": -0.02641383744776249, |
| "std": 1.0074299573898315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23873740434646606, |
| "max": 0.24359266459941864, |
| "mean": -2.525941454223357e-05, |
| "std": 0.04320967569947243, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06238892674446106, |
| "max": 0.056785948574543, |
| "mean": 0.0003448878414928913, |
| "std": 0.014156854711472988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4372255206108093, |
| "max": 0.37362250685691833, |
| "mean": 1.442125540052075e-05, |
| "std": 0.04412117227911949, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09657814353704453, |
| "max": 0.1761663407087326, |
| "mean": -0.0006602209759876132, |
| "std": 0.03516199812293053, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4218268096446991, |
| "max": 1.070821762084961, |
| "mean": 0.7484229803085327, |
| "std": 0.042183347046375275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26658228039741516, |
| "max": 0.2970208525657654, |
| "mean": -7.946729601826519e-05, |
| "std": 0.04080420732498169, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18536308407783508, |
| "max": 0.04367092251777649, |
| "mean": -0.0368281751871109, |
| "std": 0.02562659978866577, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4574359655380249, |
| "max": 0.4870511591434479, |
| "mean": 4.341827298048884e-05, |
| "std": 0.05420948192477226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2867445945739746, |
| "max": 0.5520338416099548, |
| "mean": -0.0008801904041320086, |
| "std": 0.04785289987921715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2927917540073395, |
| "max": 0.32283690571784973, |
| "mean": 6.15146973359515e-06, |
| "std": 0.019968591630458832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2908935844898224, |
| "max": 0.7611098885536194, |
| "mean": 0.6508486270904541, |
| "std": 0.05218230187892914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2437344491481781, |
| "max": 0.2615884840488434, |
| "mean": -6.006965122651309e-06, |
| "std": 0.03961160406470299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.267729252576828, |
| "max": 0.20025481283664703, |
| "mean": -0.0008811865700408816, |
| "std": 0.05178782343864441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27242225408554077, |
| "max": 0.25395235419273376, |
| "mean": 4.551842721411958e-06, |
| "std": 0.03870858997106552, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.978915214538574, |
| "max": 15.964410781860352, |
| "mean": 0.033282238990068436, |
| "std": 1.9907665252685547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20730799436569214, |
| "max": 0.22610057890415192, |
| "mean": -7.21659671398811e-05, |
| "std": 0.0405535064637661, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06938357651233673, |
| "max": 0.06327643245458603, |
| "mean": 0.00015629694098606706, |
| "std": 0.014746708795428276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46517401933670044, |
| "max": 0.320604145526886, |
| "mean": 1.968832475540694e-05, |
| "std": 0.040588606148958206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06422771513462067, |
| "max": 0.11537671089172363, |
| "mean": 0.0011921785771846771, |
| "std": 0.024717185646295547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3747236132621765, |
| "max": 0.9333999156951904, |
| "mean": 0.7509297132492065, |
| "std": 0.04027929529547691, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2798289656639099, |
| "max": 0.2732216715812683, |
| "mean": -0.00016840256284922361, |
| "std": 0.0409947969019413, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19888785481452942, |
| "max": 0.05115103721618652, |
| "mean": -0.0320354662835598, |
| "std": 0.025122012943029404, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6584249138832092, |
| "max": 0.5358718037605286, |
| "mean": -4.888750845566392e-05, |
| "std": 0.05284606292843819, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19323143362998962, |
| "max": 0.5829473733901978, |
| "mean": -0.0005128738121129572, |
| "std": 0.041099581867456436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41776803135871887, |
| "max": 0.3719577491283417, |
| "mean": 6.155986739031505e-06, |
| "std": 0.02162076160311699, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.2142574042081833, |
| "max": 0.7495372891426086, |
| "mean": 0.6495493054389954, |
| "std": 0.05440565198659897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.2096448391675949, |
| "max": 0.1958194077014923, |
| "mean": 4.026427632197738e-05, |
| "std": 0.03946169093251228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3297625780105591, |
| "max": 0.25971850752830505, |
| "mean": -0.003232162445783615, |
| "std": 0.05629448592662811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2059866487979889, |
| "max": 0.25485166907310486, |
| "mean": 5.424032860901207e-05, |
| "std": 0.0385642871260643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.252347469329834, |
| "max": 6.942240238189697, |
| "mean": 0.0483565516769886, |
| "std": 1.3863071203231812, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20988626778125763, |
| "max": 0.23036901652812958, |
| "mean": -5.1103716032230295e-06, |
| "std": 0.04131251201033592, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04387219622731209, |
| "max": 0.036041487008333206, |
| "mean": 6.907794158905745e-07, |
| "std": 0.012801294215023518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3976612091064453, |
| "max": 0.3448401689529419, |
| "mean": -5.557302574743517e-05, |
| "std": 0.04238886013627052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055147796869277954, |
| "max": 0.06285040080547333, |
| "mean": 0.00036463249125517905, |
| "std": 0.018676765263080597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3504812717437744, |
| "max": 1.0465654134750366, |
| "mean": 0.7894250154495239, |
| "std": 0.048819400370121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3335842788219452, |
| "max": 0.3860694169998169, |
| "mean": -0.00016952167788986117, |
| "std": 0.041479866951704025, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15750724077224731, |
| "max": 0.05909515544772148, |
| "mean": -0.03184274956583977, |
| "std": 0.025149760767817497, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6963300704956055, |
| "max": 0.4692156910896301, |
| "mean": -8.906715083867311e-05, |
| "std": 0.05179242789745331, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.2484702616930008, |
| "max": 0.32900601625442505, |
| "mean": -0.0002533062652219087, |
| "std": 0.041455697268247604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2871420085430145, |
| "max": 0.35027819871902466, |
| "mean": -2.14410374610452e-06, |
| "std": 0.024236002936959267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.1965855360031128, |
| "max": 0.7816711664199829, |
| "mean": 0.6702626943588257, |
| "std": 0.05871051922440529, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22907193005084991, |
| "max": 0.23129022121429443, |
| "mean": -1.9948049157392234e-05, |
| "std": 0.040437448769807816, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22012899816036224, |
| "max": 0.24119356274604797, |
| "mean": 0.0007787380600348115, |
| "std": 0.0558554045855999, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21590574085712433, |
| "max": 0.22671166062355042, |
| "mean": -7.169770105974749e-05, |
| "std": 0.03937356546521187, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.917876243591309, |
| "max": 9.080994606018066, |
| "mean": -0.001221940852701664, |
| "std": 1.850203514099121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2697039842605591, |
| "max": 0.2592160999774933, |
| "mean": 4.3639320210786536e-05, |
| "std": 0.03840581700205803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.057751722633838654, |
| "max": 0.05785238742828369, |
| "mean": 0.0003506582579575479, |
| "std": 0.014723116531968117, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26493385434150696, |
| "max": 0.28856679797172546, |
| "mean": -6.166309321997687e-05, |
| "std": 0.0390719398856163, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04392173886299133, |
| "max": 0.037354789674282074, |
| "mean": -9.023403254104778e-05, |
| "std": 0.013362305238842964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.339423805475235, |
| "max": 1.0940691232681274, |
| "mean": 0.8637771010398865, |
| "std": 0.06392761319875717, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4232974052429199, |
| "max": 0.418984055519104, |
| "mean": 0.0003126158844679594, |
| "std": 0.043500375002622604, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.2149772197008133, |
| "max": 0.1709900051355362, |
| "mean": -0.02949333004653454, |
| "std": 0.03195162117481232, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5998314619064331, |
| "max": 0.5601617097854614, |
| "mean": -0.00015080120647326112, |
| "std": 0.053445130586624146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17873013019561768, |
| "max": 0.3772476017475128, |
| "mean": 0.001360590336844325, |
| "std": 0.03732540085911751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39441367983818054, |
| "max": 0.36907026171684265, |
| "mean": 3.6978712159907445e-05, |
| "std": 0.02861737459897995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2903454899787903, |
| "max": 0.8293581604957581, |
| "mean": 0.7055460214614868, |
| "std": 0.0678996667265892, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9263197779655457, |
| "max": 1.0265021324157715, |
| "mean": -2.6120340407942422e-05, |
| "std": 0.04762475937604904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8798882961273193, |
| "max": 0.8164214491844177, |
| "mean": -0.0003084776981268078, |
| "std": 0.09563522785902023, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2697499990463257, |
| "max": 0.24099533259868622, |
| "mean": -2.2782449377700686e-05, |
| "std": 0.03895165026187897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.775798797607422, |
| "max": 22.882915496826172, |
| "mean": -0.09193148463964462, |
| "std": 4.075654983520508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22787398099899292, |
| "max": 0.24508967995643616, |
| "mean": -2.5707324311952107e-05, |
| "std": 0.038637157529592514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06037602946162224, |
| "max": 0.04592515528202057, |
| "mean": -0.00014296159497462213, |
| "std": 0.01469582598656416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33830153942108154, |
| "max": 0.3749238848686218, |
| "mean": 7.406164513668045e-06, |
| "std": 0.04081294313073158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04650312289595604, |
| "max": 0.19583187997341156, |
| "mean": 0.00027365636196918786, |
| "std": 0.01356838084757328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.374420702457428, |
| "max": 1.1316319704055786, |
| "mean": 0.8900625109672546, |
| "std": 0.0640411525964737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4478547275066376, |
| "max": 0.5426859259605408, |
| "mean": 2.472557571309153e-05, |
| "std": 0.045565616339445114, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22437457740306854, |
| "max": 0.08822718262672424, |
| "mean": -0.03203187137842178, |
| "std": 0.037792954593896866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7258070111274719, |
| "max": 0.689643919467926, |
| "mean": 3.430668584769592e-05, |
| "std": 0.05177781358361244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17477792501449585, |
| "max": 0.2187574803829193, |
| "mean": 4.145095590502024e-05, |
| "std": 0.03179146349430084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.34034836292266846, |
| "max": 0.37395596504211426, |
| "mean": 4.299964348319918e-05, |
| "std": 0.034139711409807205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3176548182964325, |
| "max": 1.2885946035385132, |
| "mean": 0.6015164256095886, |
| "std": 0.08361472934484482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2833174467086792, |
| "max": 0.2604674696922302, |
| "mean": -2.836968405972584e-06, |
| "std": 0.0359807163476944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23581622540950775, |
| "max": 0.20569506287574768, |
| "mean": 0.00023786764359101653, |
| "std": 0.05603973567485809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4355442523956299, |
| "max": 0.3252858817577362, |
| "mean": 2.4317849238286726e-05, |
| "std": 0.03413137421011925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.551609039306641, |
| "max": 7.321235179901123, |
| "mean": -0.00739276222884655, |
| "std": 0.7000025510787964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3440183103084564, |
| "max": 0.36360201239585876, |
| "mean": 0.00010336286504752934, |
| "std": 0.04782794788479805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07385823875665665, |
| "max": 0.060460202395915985, |
| "mean": 0.0009339989046566188, |
| "std": 0.014948051422834396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2559783458709717, |
| "max": 0.2868276536464691, |
| "mean": 4.447174433153123e-06, |
| "std": 0.041554734110832214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05538920685648918, |
| "max": 0.06289947777986526, |
| "mean": 0.0001379675231873989, |
| "std": 0.007169328164309263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4936121106147766, |
| "max": 1.2250889539718628, |
| "mean": 1.0134532451629639, |
| "std": 0.11746872216463089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0936273336410522, |
| "max": 1.0470186471939087, |
| "mean": -4.9267873691860586e-05, |
| "std": 0.05240849778056145, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22356118261814117, |
| "max": 0.17290450632572174, |
| "mean": -0.0272555910050869, |
| "std": 0.03637368604540825, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8851283192634583, |
| "max": 0.9232462048530579, |
| "mean": -0.00014597055269405246, |
| "std": 0.05328158289194107, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17127959430217743, |
| "max": 0.38031327724456787, |
| "mean": 0.0033715758472681046, |
| "std": 0.03991725295782089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7775956392288208, |
| "max": 0.7237375378608704, |
| "mean": 1.8900283976108767e-05, |
| "std": 0.04616079851984978, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3385881781578064, |
| "max": 1.4302620887756348, |
| "mean": 0.9483721852302551, |
| "std": 0.20680920779705048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.745700478553772, |
| "max": 1.704361081123352, |
| "mean": 0.0002272462734254077, |
| "std": 0.15868504345417023, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2009143829345703, |
| "max": 1.1011698246002197, |
| "mean": -0.009547756053507328, |
| "std": 0.20407216250896454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4211972653865814, |
| "max": 0.42695388197898865, |
| "mean": 6.460870645241812e-05, |
| "std": 0.04801572859287262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.76715850830078, |
| "max": 19.56202507019043, |
| "mean": -0.24856510758399963, |
| "std": 4.78177547454834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32400673627853394, |
| "max": 0.4385600686073303, |
| "mean": -1.1902460755663924e-05, |
| "std": 0.046161260455846786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.0341118723154068, |
| "max": 0.03712359443306923, |
| "mean": 0.0006423432496376336, |
| "std": 0.012920627370476723, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7036018371582031, |
| "max": 0.6655198335647583, |
| "mean": 4.3310083128744736e-05, |
| "std": 0.057881489396095276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07232781499624252, |
| "max": 0.06761610507965088, |
| "mean": -0.00013295613462105393, |
| "std": 0.012923309579491615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38030096888542175, |
| "max": 1.391922116279602, |
| "mean": 1.066575527191162, |
| "std": 0.21970626711845398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164926886558533, |
| "max": 0.717415988445282, |
| "mean": 0.00011193109094165266, |
| "std": 0.05802033841609955, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21991026401519775, |
| "max": 0.22539444267749786, |
| "mean": 0.006232057698071003, |
| "std": 0.049761686474084854, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6297550797462463, |
| "max": 0.8893491625785828, |
| "mean": 1.1787104085669853e-05, |
| "std": 0.023527706041932106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5073941349983215, |
| "max": 0.47446364164352417, |
| "mean": -0.00302139762789011, |
| "std": 0.06935760378837585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377801060676575, |
| "max": 1.1812876462936401, |
| "mean": 0.7827885746955872, |
| "std": 0.09896031767129898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2672213613986969, |
| "max": 0.21292650699615479, |
| "mean": -0.00022339042334351689, |
| "std": 0.05399598926305771, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23833607137203217, |
| "max": 0.014835306443274021, |
| "mean": -0.04396972805261612, |
| "std": 0.03436173498630524, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |