{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.4304574429988861, "max": 0.2989666759967804, "mean": -0.0025583612732589245, "std": 0.042551927268505096, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06317814439535141, "max": 0.10763632506132126, "mean": 0.0005897035007365048, "std": 0.03411067649722099, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4125778377056122, "max": 0.8363006114959717, "mean": -0.00021047875634394586, "std": 0.024107400327920914, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.1154782623052597, "max": 0.32146546244621277, "mean": -0.0009399052942171693, "std": 0.019577190279960632, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7917730808258057, "max": 2.8704917430877686, "mean": -0.0003648003621492535, "std": 0.6153737306594849, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.27894294261932373, "max": 0.38190174102783203, "mean": 0.00042033716454170644, "std": 0.042750339955091476, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.2222987860441208, "max": 0.20967179536819458, "mean": -0.00449405936524272, "std": 0.04091016948223114, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4279242753982544, "max": 0.47530120611190796, "mean": 2.540943796702777e-06, "std": 0.024509120732545853, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32545235753059387, "max": 0.15698140859603882, "mean": -0.0467013455927372, "std": 0.051578979939222336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41039708256721497, "max": 0.3545180857181549, "mean": -0.00012633543519768864, "std": 0.023601215332746506, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.2297646850347519, "max": 0.26262199878692627, "mean": -0.029148615896701813, "std": 0.049347542226314545, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2546185553073883, "max": 0.8200821876525879, "mean": 0.5254418849945068, "std": 0.08080805093050003, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.29693102836608887, "max": 0.26530489325523376, "mean": -0.00042408728040754795, "std": 0.032104212790727615, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09274528920650482, "max": 0.12482056021690369, "mean": 0.0006486810743808746, "std": 0.025742707774043083, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29047587513923645, "max": 0.28141430020332336, "mean": -7.6991505920887e-05, "std": 0.03093625046312809, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.899471282958984, "max": 5.8142476081848145, "mean": -0.009332108311355114, "std": 1.2954597473144531, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42482444643974304, "max": 0.34377753734588623, "mean": 9.762628906173632e-05, "std": 0.02995302341878414, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028968220576643944, "max": 0.027649197727441788, "mean": -0.0003115592699032277, "std": 0.012572345323860645, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45394477248191833, "max": 0.44869503378868103, "mean": 2.2737156541552395e-05, "std": 0.023855075240135193, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08868509531021118, "max": 0.0911499559879303, "mean": 0.002273137215524912, "std": 0.019512129947543144, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2666190564632416, "max": 1.0562766790390015, "mean": 0.531130313873291, "std": 0.1044141948223114, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5744591355323792, "max": 0.6083897948265076, "mean": -0.00043104952783323824, "std": 0.03859502077102661, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.1818137913942337, "max": 0.045760128647089005, "mean": -0.029441693797707558, "std": 0.042590487748384476, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.166682481765747, "max": 1.634623646736145, "mean": 0.0003185438981745392, "std": 0.02769385650753975, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16253960132598877, "max": 0.2057240754365921, "mean": -0.021116681396961212, "std": 0.027940358966588974, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.2244873046875, "max": 0.8436590433120728, "mean": 0.48752978444099426, "std": 0.07519952952861786, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25530415773391724, "max": 0.3058406710624695, "mean": -9.383336873725057e-06, "std": 0.03347048535943031, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09549209475517273, "max": 0.11042480170726776, "mean": 5.650718230754137e-05, "std": 0.02698545530438423, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2974618077278137, "max": 0.295981764793396, "mean": 5.020356547902338e-05, "std": 0.03253836929798126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.164300918579102, "max": 5.084524154663086, "mean": -0.0145945493131876, "std": 1.1573816537857056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.3448942005634308, "max": 0.3434945046901703, "mean": 7.886815001256764e-05, "std": 0.030058231204748154, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.036158282309770584, "max": 0.03324951231479645, "mean": -0.00014386117982212454, "std": 0.013023010455071926, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.31528618931770325, "max": 0.3752082884311676, "mean": -2.1654177544405684e-05, "std": 0.024055516347289085, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10527443885803223, "max": 0.12188493460416794, "mean": -0.001954286126419902, "std": 0.0288428645581007, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.31180328130722046, "max": 1.120958685874939, "mean": 0.6662410497665405, "std": 0.09774944931268692, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.872490644454956, "max": 0.627565324306488, "mean": 0.0016757093835622072, "std": 0.047438349574804306, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27100008726119995, "max": 0.03407798707485199, "mean": -0.04660271108150482, "std": 0.04059542715549469, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9202945232391357, "max": 0.9643993973731995, "mean": 0.0010207913583144546, "std": 0.04070187732577324, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14455102384090424, "max": 0.07482050359249115, "mean": -0.009084243327379227, "std": 0.025694938376545906, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23976297676563263, "max": 0.7124081254005432, "mean": 0.4472041726112366, "std": 0.05932378023862839, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27300503849983215, "max": 0.297477126121521, "mean": 8.662666004966013e-06, "std": 0.035474397242069244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11903306841850281, "max": 0.11846816539764404, "mean": 0.0007502126973122358, "std": 0.02760804258286953, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28101953864097595, "max": 0.27942612767219543, "mean": -7.648450991837308e-05, "std": 0.03510245680809021, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5096001625061035, "max": 2.5215961933135986, "mean": 0.026745397597551346, "std": 0.586780309677124, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.22110240161418915, "max": 0.27161508798599243, "mean": 2.438401679683011e-06, "std": 0.030731581151485443, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.033151235431432724, "max": 0.031146494671702385, "mean": 0.00011706411896739155, "std": 0.012394252233207226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23539645969867706, "max": 0.23185278475284576, "mean": 5.7256078434875235e-05, "std": 0.025697633624076843, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13603141903877258, "max": 0.1280086189508438, "mean": -0.005497735925018787, "std": 0.03996264934539795, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3547299802303314, "max": 1.1723523139953613, "mean": 0.7105399370193481, "std": 0.10377444326877594, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6173876523971558, "max": 0.5556272268295288, "mean": 0.001160334562882781, "std": 0.046114034950733185, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18945953249931335, "max": 0.024937259033322334, "mean": -0.034846723079681396, "std": 0.028622858226299286, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1309547424316406, "max": 0.97038733959198, "mean": 0.00035909086000174284, "std": 0.04234256222844124, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5978560447692871, "max": 0.06273925304412842, "mean": -0.0048814816400408745, "std": 0.028621360659599304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3753381073474884, "max": 0.9404851794242859, "mean": 0.592466413974762, "std": 0.06694933772087097, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3917763829231262, "max": 0.36936038732528687, "mean": 7.001425547059625e-05, "std": 0.0371866449713707, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11900075525045395, "max": 0.13653883337974548, "mean": 0.0009160788613371551, "std": 0.029187612235546112, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6190802454948425, "max": 0.508792519569397, "mean": 1.5223037735268008e-05, "std": 0.036439377814531326, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.18681526184082, "max": 8.788924217224121, "mean": -0.10927566885948181, "std": 1.6988582611083984, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27652865648269653, "max": 0.2397209107875824, "mean": 5.228666486800648e-05, "std": 0.03261314332485199, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.0514988899230957, "max": 0.03946297615766525, "mean": 9.359161776956171e-05, "std": 0.012969369068741798, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23075971007347107, "max": 0.23487111926078796, "mean": -2.203527037636377e-05, "std": 0.029389776289463043, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20423616468906403, "max": 0.1052512601017952, "mean": -0.004020487889647484, "std": 0.03263992816209793, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.33965712785720825, "max": 1.012444019317627, "mean": 0.7007054090499878, "std": 0.09675901383161545, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5647616982460022, "max": 0.8335906267166138, "mean": 0.0004150677123107016, "std": 0.04229460284113884, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21212875843048096, "max": 0.029963094741106033, "mean": -0.03217349201440811, "std": 0.026498712599277496, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7548851370811462, "max": 0.719126284122467, "mean": -1.581827746122144e-05, "std": 0.036835212260484695, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2634251117706299, "max": 0.1063019409775734, "mean": -0.0030143139883875847, "std": 0.028873277828097343, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28394702076911926, "max": 0.6950414180755615, "mean": 0.4993884563446045, "std": 0.04653454199433327, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.2782432436943054, "max": 0.2338251918554306, "mean": -0.00011091169290011749, "std": 0.03875752165913582, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15358327329158783, "max": 0.12643983960151672, "mean": -0.0022276192903518677, "std": 0.033326249569654465, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41438740491867065, "max": 0.6594708561897278, "mean": -1.851528577390127e-05, "std": 0.039096731692552567, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.237917423248291, "max": 4.722480773925781, "mean": -0.020456865429878235, "std": 1.0076923370361328, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.245052769780159, "max": 0.20759740471839905, "mean": 4.428692045621574e-05, "std": 0.0339626781642437, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.034463901072740555, "max": 0.04485860466957092, "mean": -2.209081139881164e-05, "std": 0.012639513239264488, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.2011018991470337, "max": 0.20644338428974152, "mean": -2.9357790481299162e-05, "std": 0.03102092258632183, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19982746243476868, "max": 0.11318917572498322, "mean": -0.0028952043503522873, "std": 0.03453591465950012, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36675214767456055, "max": 1.0576648712158203, "mean": 0.6704948544502258, "std": 0.06640778481960297, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39844217896461487, "max": 0.5021068453788757, "mean": -3.8750327803427354e-05, "std": 0.04113020375370979, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12863779067993164, "max": 0.026958497241139412, "mean": -0.030533233657479286, "std": 0.02188229374587536, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.449487566947937, "max": 0.43325698375701904, "mean": 7.53812346374616e-05, "std": 0.03489059582352638, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2675015926361084, "max": 0.07307843118906021, "mean": -0.0010904058581218123, "std": 0.02313595451414585, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28754422068595886, "max": 0.6852768659591675, "mean": 0.5245310068130493, "std": 0.04753505066037178, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22266238927841187, "max": 0.22331833839416504, "mean": 1.5918290955596603e-05, "std": 0.038949232548475266, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13635052740573883, "max": 0.10933808237314224, "mean": 0.00024784280685707927, "std": 0.029207777231931686, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37493839859962463, "max": 0.43759685754776, "mean": -9.403542208019644e-06, "std": 0.03928738459944725, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8458573818206787, "max": 4.999326705932617, "mean": 0.009741819463670254, "std": 0.8452204465866089, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22270528972148895, "max": 0.22029587626457214, "mean": -3.1911031328490935e-07, "std": 0.034410301595926285, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.043785978108644485, "max": 0.03592836111783981, "mean": -0.0002596271806396544, "std": 0.012078739702701569, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21270592510700226, "max": 0.18842868506908417, "mean": -1.7000973457470536e-05, "std": 0.03153671696782112, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.1809375286102295, "max": 0.12074985355138779, "mean": -0.002395304851233959, "std": 0.04127994924783707, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.422917902469635, "max": 0.9417884349822998, "mean": 0.6626536250114441, "std": 0.05681688338518143, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3708776533603668, "max": 0.4765470623970032, "mean": -8.20929926703684e-05, "std": 0.04088940471410751, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20849654078483582, "max": 0.0273736622184515, "mean": -0.03023475781083107, "std": 0.021363815292716026, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3406715989112854, "max": 0.7341561913490295, "mean": 8.243846968980506e-05, "std": 0.03476623818278313, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.24016188085079193, "max": 0.05046152323484421, "mean": -0.0011865879641845822, "std": 0.020459504798054695, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.30588385462760925, "max": 0.6534701585769653, "mean": 0.5251248478889465, "std": 0.04612228646874428, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30431559681892395, "max": 0.21719232201576233, "mean": 6.998516619205475e-05, "std": 0.039497170597314835, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14912384748458862, "max": 0.13098323345184326, "mean": 0.0003266759740654379, "std": 0.03045588731765747, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25694772601127625, "max": 0.201896533370018, "mean": 3.129036849713884e-05, "std": 0.0394882932305336, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.336271047592163, "max": 2.375894784927368, "mean": -0.026241114363074303, "std": 0.44977155327796936, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18857863545417786, "max": 0.21028850972652435, "mean": 3.711117460625246e-05, "std": 0.034793779253959656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03168531507253647, "max": 0.03566686809062958, "mean": -0.00019767877529375255, "std": 0.012288626283407211, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18829987943172455, "max": 0.17024517059326172, "mean": -6.836466491222382e-05, "std": 0.03217046335339546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13942238688468933, "max": 0.1372329592704773, "mean": -0.002514950931072235, "std": 0.05129847675561905, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4670739769935608, "max": 0.955595850944519, "mean": 0.6688634157180786, "std": 0.05277201533317566, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3244037926197052, "max": 0.309257835149765, "mean": -1.045628778229002e-06, "std": 0.04094540327787399, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.1248614490032196, "max": 0.025666970759630203, "mean": -0.030689720064401627, "std": 0.019823001697659492, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.43948638439178467, "max": 0.44534069299697876, "mean": 9.591381240170449e-05, "std": 0.035119153559207916, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.2246266007423401, "max": 0.051820773631334305, "mean": -0.0011818428756669164, "std": 0.018466750159859657, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.33914706110954285, "max": 0.7398536205291748, "mean": 0.5587007999420166, "std": 0.04139573872089386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.2729904353618622, "max": 0.27884039282798767, "mean": 2.0351768398541026e-05, "std": 0.04105766862630844, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13680818676948547, "max": 0.13977055251598358, "mean": 0.0004918644553981721, "std": 0.02663181535899639, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.49051523208618164, "max": 0.35575586557388306, "mean": 8.911330223781988e-05, "std": 0.04069535806775093, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.2970781326293945, "max": 1.745163917541504, "mean": -0.021079789847135544, "std": 0.500128984451294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2181096374988556, "max": 0.1974443644285202, "mean": -4.0170674765249714e-05, "std": 0.03423338383436203, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.041142482310533524, "max": 0.03885917738080025, "mean": -0.0001360031747026369, "std": 0.012883774936199188, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17761866748332977, "max": 0.1828862875699997, "mean": 4.801471368409693e-05, "std": 0.03155674412846565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.1799207329750061, "max": 0.18389682471752167, "mean": -0.0022146617993712425, "std": 0.05482979863882065, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.474190354347229, "max": 1.0258487462997437, "mean": 0.6452326774597168, "std": 0.05035318806767464, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27163514494895935, "max": 0.3091295659542084, "mean": 0.00011244519555475563, "std": 0.04068158566951752, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10526852309703827, "max": 0.026741184294223785, "mean": -0.029519207775592804, "std": 0.01793486438691616, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.33932313323020935, "max": 0.329169899225235, "mean": 5.2667885029222816e-05, "std": 0.03441279008984566, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18180307745933533, "max": 0.042509548366069794, "mean": -0.0010597179643809795, "std": 0.017209293320775032, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32517459988594055, "max": 0.6865665912628174, "mean": 0.511164128780365, "std": 0.03695276752114296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23393133282661438, "max": 0.2253761738538742, "mean": -3.613880107877776e-05, "std": 0.039175428450107574, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11511484533548355, "max": 0.13181191682815552, "mean": 0.00015029555652290583, "std": 0.029160132631659508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.35229772329330444, "max": 0.28487107157707214, "mean": 6.5603690018178895e-06, "std": 0.03924452140927315, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.132349967956543, "max": 3.543774366378784, "mean": -0.011590607464313507, "std": 0.6826151609420776, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21073584258556366, "max": 0.20936711132526398, "mean": 3.4690663596848026e-05, "std": 0.03448447957634926, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03585724160075188, "max": 0.047966208308935165, "mean": 0.0007884915685281157, "std": 0.012871142476797104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21028311550617218, "max": 0.19305972754955292, "mean": -9.823215805226937e-07, "std": 0.031695324927568436, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.1864088624715805, "max": 0.17721442878246307, "mean": -0.0028417375870049, "std": 0.058615218847990036, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.47462186217308044, "max": 1.0414687395095825, "mean": 0.651329517364502, "std": 0.049656689167022705, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24834677577018738, "max": 0.3290989398956299, "mean": 0.00018076221749652177, "std": 0.04056994616985321, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12541481852531433, "max": 0.024957137182354927, "mean": -0.030498644337058067, "std": 0.017614001408219337, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4203978180885315, "max": 0.4814401865005493, "mean": 1.1958536560996436e-06, "std": 0.03539701923727989, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15133719146251678, "max": 0.04343123733997345, "mean": 4.256972897564992e-05, "std": 0.014886128716170788, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31556373834609985, "max": 0.6816186308860779, "mean": 0.5528932809829712, "std": 0.04069383069872856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20636020600795746, "max": 0.21985411643981934, "mean": 3.188779010088183e-05, "std": 0.03829942271113396, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13772568106651306, "max": 0.1125853881239891, "mean": 2.6155808882322162e-05, "std": 0.025809435173869133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.40282922983169556, "max": 0.37083154916763306, "mean": 2.5528193873469718e-05, "std": 0.03817952424287796, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7708845138549805, "max": 2.868703603744507, "mean": 0.0011554225347936153, "std": 0.5168288946151733, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20372195541858673, "max": 0.1975945085287094, "mean": 2.9724978958256543e-05, "std": 0.03429732471704483, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.0505308173596859, "max": 0.039880186319351196, "mean": -0.0004213028587400913, "std": 0.01341495756059885, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19602739810943604, "max": 0.20172414183616638, "mean": -1.2448943380150013e-05, "std": 0.031805410981178284, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19294138252735138, "max": 0.19508768618106842, "mean": -0.0029671685770154, "std": 0.06252522766590118, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.348909467458725, "max": 1.083768367767334, "mean": 0.667101263999939, "std": 0.055243175476789474, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22561651468276978, "max": 0.2514271140098572, "mean": 0.0003585518861655146, "std": 0.04075947403907776, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09107004851102829, "max": 0.04363898187875748, "mean": -0.03007982112467289, "std": 0.017611678689718246, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.353363573551178, "max": 0.3039560914039612, "mean": -4.4702926970785484e-05, "std": 0.037122584879398346, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16167114675045013, "max": 0.06346774101257324, "mean": -7.894223381299525e-05, "std": 0.019427189603447914, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34871092438697815, "max": 0.7219411134719849, "mean": 0.5423486828804016, "std": 0.03906320407986641, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.219291090965271, "max": 0.22339218854904175, "mean": -1.1523573448357638e-05, "std": 0.03923090174794197, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.118381567299366, "max": 0.17055465281009674, "mean": 0.00028248116723261774, "std": 0.025117389857769012, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24647831916809082, "max": 0.30066463351249695, "mean": -3.701161767821759e-05, "std": 0.03893034905195236, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5050106048583984, "max": 3.714456796646118, "mean": 0.015847081318497658, "std": 0.7823866009712219, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.2191196233034134, "max": 0.2373991161584854, "mean": -1.3136124835000373e-05, "std": 0.03630338981747627, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04720474034547806, "max": 0.051363855600357056, "mean": 0.00048070820048451424, "std": 0.013523152098059654, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21417121589183807, "max": 0.21722286939620972, "mean": 5.63644825888332e-05, "std": 0.0336158350110054, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21132777631282806, "max": 0.2312006652355194, "mean": -0.0050989487208426, "std": 0.06185900419950485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36193206906318665, "max": 1.1010645627975464, "mean": 0.6992560029029846, "std": 0.05359357222914696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.2351117730140686, "max": 0.24475757777690887, "mean": 0.00046337785897776484, "std": 0.041268885135650635, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09809085726737976, "max": 0.06809623539447784, "mean": -0.0314301960170269, "std": 0.018128085881471634, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30171892046928406, "max": 0.35163986682891846, "mean": -8.267226803582162e-05, "std": 0.04027453064918518, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.1522630751132965, "max": 0.14965395629405975, "mean": 0.0002633024996612221, "std": 0.023038938641548157, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9992594122886658, "max": 1.0015419721603394, "mean": 1.0000762939453125, "std": 0.0006376681849360466, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03125917166471481, "max": 0.03125542029738426, "mean": -1.929077916429378e-05, "std": 0.018040984869003296, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031228363513946533, "max": 0.030987966805696487, "mean": -0.0010841633193194866, "std": 0.017950600013136864, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03125608712434769, "max": 0.03125986456871033, "mean": 3.548163931554882e-06, "std": 0.018041392788290977, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.03115428239107132, "max": 0.031174642965197563, "mean": 0.00033392058685421944, "std": 0.01806280016899109, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0006233988679014146, "max": 0.0007061311043798923, "mean": 4.538033408607589e-06, "std": 0.0001893796434160322, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.997599720954895, "max": 1.002988576889038, "mean": 0.9999969601631165, "std": 0.000850954616907984, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.033545054495334625, "max": 0.033692505210638046, "mean": -6.091411705710925e-06, "std": 0.018047811463475227, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.033063653856515884, "max": 0.033412136137485504, "mean": -0.00018106887000612915, "std": 0.017954090610146523, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.001468250178731978, "max": 0.0015634398441761732, "mean": 1.9080666788795497e-06, "std": 0.00028948785620741546, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005752606084570289, "max": 0.0007690406637266278, "mean": 7.6006986091670115e-06, "std": 0.00017151834617834538, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3833079934120178, "max": 0.7191449403762817, "mean": 0.5806841254234314, "std": 0.03885476291179657, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23893281817436218, "max": 0.19658899307250977, "mean": 2.609232979011722e-05, "std": 0.03746626526117325, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11880965530872345, "max": 0.1667701154947281, "mean": 0.000981115852482617, "std": 0.02755648083984852, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.2465641349554062, "max": 0.49993160367012024, "mean": -5.0439630285836756e-05, "std": 0.03762364014983177, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9418535232543945, "max": 3.7689952850341797, "mean": -0.003572138026356697, "std": 0.6813418865203857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.2274625599384308, "max": 0.25183549523353577, "mean": -1.1858754987770226e-05, "std": 0.03743482381105423, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07157625257968903, "max": 0.08059139549732208, "mean": -0.0005097019020467997, "std": 0.0156550370156765, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22814570367336273, "max": 0.2576799690723419, "mean": -2.8758266125805676e-05, "std": 0.03542165458202362, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20052188634872437, "max": 0.21483485400676727, "mean": -0.0055272276513278484, "std": 0.06832942366600037, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40502721071243286, "max": 1.189380407333374, "mean": 0.7378897666931152, "std": 0.05522923544049263, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22088685631752014, "max": 0.2456110566854477, "mean": 0.0005211912211962044, "std": 0.04133584350347519, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10322928428649902, "max": 0.024186961352825165, "mean": -0.03266708552837372, "std": 0.018890798091888428, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.44966569542884827, "max": 0.42246878147125244, "mean": -0.00043506931979209185, "std": 0.04689610004425049, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2515268921852112, "max": 0.47013524174690247, "mean": 0.003204584587365389, "std": 0.04452726989984512, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.31688186526298523, "max": 0.33314481377601624, "mean": -2.5167657440761104e-05, "std": 0.02128784917294979, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3244757652282715, "max": 0.6856456398963928, "mean": 0.5710105299949646, "std": 0.044706691056489944, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16456718742847443, "max": 0.17448973655700684, "mean": -4.871570490649901e-05, "std": 0.03318251296877861, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18692979216575623, "max": 0.14325818419456482, "mean": 3.459470462985337e-05, "std": 0.029701216146349907, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.38104259967803955, "max": 0.2459549903869629, "mean": -9.848581612459384e-06, "std": 0.03276371210813522, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.655487537384033, "max": 3.2897744178771973, "mean": -0.01425144076347351, "std": 0.985081136226654, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23475398123264313, "max": 0.24735963344573975, "mean": -1.814730239857454e-05, "std": 0.041698258370161057, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07251452654600143, "max": 0.15445762872695923, "mean": 0.0006656228797510266, "std": 0.0251647736877203, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.26630881428718567, "max": 0.2481267750263214, "mean": -1.5170076949289069e-05, "std": 0.0401393324136734, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.1895921230316162, "max": 0.19462409615516663, "mean": -0.001237674499861896, "std": 0.06668463349342346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32920053601264954, "max": 0.999627411365509, "mean": 0.7191565632820129, "std": 0.052332233637571335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23170752823352814, "max": 0.24531398713588715, "mean": 0.00018265214748680592, "std": 0.040900230407714844, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11451739817857742, "max": 0.019039874896407127, "mean": -0.0424770824611187, "std": 0.018864724785089493, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.38964730501174927, "max": 0.40745288133621216, "mean": -2.1833995560882613e-05, "std": 0.0485333576798439, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6929526925086975, "max": 0.4126836955547333, "mean": 0.0008477572700940073, "std": 0.060282669961452484, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0013933395966887474, "max": 1.000746726989746, "mean": 0.00048820103984326124, "std": 0.022089513018727303, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9992843866348267, "max": 1.001552939414978, "mean": 1.0000746250152588, "std": 0.0006248687277548015, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03125389292836189, "max": 0.03125779330730438, "mean": -2.1020408894401044e-05, "std": 0.01803232543170452, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031215354800224304, "max": 0.031232187524437904, "mean": -0.0006770011968910694, "std": 0.017826862633228302, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03125695139169693, "max": 0.03126237541437149, "mean": -8.831485502014402e-06, "std": 0.018031351268291473, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.03123210370540619, "max": 0.03124479576945305, "mean": -0.0007297537522390485, "std": 0.017941787838935852, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0005147741758264601, "max": 0.00041916739428415895, "mean": -4.1531684473739006e-06, "std": 0.0001558788208058104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.997329831123352, "max": 1.0023579597473145, "mean": 0.9995578527450562, "std": 0.0008328193798661232, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.033257633447647095, "max": 0.03283705189824104, "mean": -2.9398686365311733e-06, "std": 0.01802799478173256, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.0324481800198555, "max": 0.03130009397864342, "mean": -0.000511951744556427, "std": 0.01803583651781082, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0017112370114773512, "max": 0.0015153783606365323, "mean": -1.2167475915703108e-06, "std": 0.00028721734997816384, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00046955313882790506, "max": 0.0003882118908222765, "mean": -3.8059165490267333e-06, "std": 0.00014281016774475574, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23431308567523956, "max": 0.2725020945072174, "mean": 6.621908141823951e-06, "std": 0.018810350447893143, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32144924998283386, "max": 0.6939579248428345, "mean": 0.5816149711608887, "std": 0.045937687158584595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18192073702812195, "max": 0.1977624148130417, "mean": -1.1576559700188227e-05, "std": 0.03318417817354202, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16049131751060486, "max": 0.1293114274740219, "mean": -0.00107291666790843, "std": 0.03413516655564308, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3323962688446045, "max": 0.31116873025894165, "mean": -1.0262579962727614e-05, "std": 0.03223471716046333, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.802563190460205, "max": 8.761749267578125, "mean": 0.09345458447933197, "std": 1.6194684505462646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23397405445575714, "max": 0.2418195903301239, "mean": 4.162176628597081e-05, "std": 0.04085618257522583, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07595669478178024, "max": 0.0657576471567154, "mean": 0.00048221880570054054, "std": 0.019416553899645805, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.2459147870540619, "max": 0.23389238119125366, "mean": -3.2510670280316845e-06, "std": 0.03943093866109848, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.1629837304353714, "max": 0.16088047623634338, "mean": 0.0016233830247074366, "std": 0.06528986245393753, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5571612119674683, "max": 0.9436106085777283, "mean": 0.7128171324729919, "std": 0.04012364149093628, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22801116108894348, "max": 0.2548006474971771, "mean": -4.5571337977889925e-05, "std": 0.04057438299059868, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13471974432468414, "max": 0.0221097432076931, "mean": -0.041352279484272, "std": 0.01838749460875988, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42162591218948364, "max": 0.3923877477645874, "mean": -4.321471351431683e-06, "std": 0.04778357967734337, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6071884632110596, "max": 0.651282787322998, "mean": 0.0015848546754568815, "std": 0.0568372942507267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.25181877613067627, "max": 0.32084232568740845, "mean": -6.161948476801626e-06, "std": 0.019613562151789665, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35955217480659485, "max": 0.6821547150611877, "mean": 0.5706839561462402, "std": 0.0429888591170311, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22016532719135284, "max": 0.17702604830265045, "mean": -3.4450480598025024e-05, "std": 0.034298721700906754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.1631413698196411, "max": 0.23277200758457184, "mean": 0.000363422412192449, "std": 0.032813675701618195, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.2639073431491852, "max": 0.2398279309272766, "mean": -5.2961986511945724e-05, "std": 0.033897411078214645, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.854308605194092, "max": 5.090536117553711, "mean": 0.04387902468442917, "std": 1.2290979623794556, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24643683433532715, "max": 0.2503347098827362, "mean": 7.216692029032856e-05, "std": 0.04398633539676666, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06248769536614418, "max": 0.05441384017467499, "mean": 0.0006457050913013518, "std": 0.017188573256134987, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2864288091659546, "max": 0.2721114456653595, "mean": -5.008514563087374e-05, "std": 0.04298446327447891, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16100545227527618, "max": 0.170342355966568, "mean": -0.0028870203532278538, "std": 0.059300076216459274, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5198097229003906, "max": 0.9330063462257385, "mean": 0.7133984565734863, "std": 0.03842313215136528, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23787352442741394, "max": 0.24874305725097656, "mean": 0.0004645891021937132, "std": 0.04045315086841583, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14499974250793457, "max": 0.04109013453125954, "mean": -0.039695803076028824, "std": 0.020541805773973465, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5323729515075684, "max": 0.5824694633483887, "mean": 5.902071279706433e-06, "std": 0.04885893687605858, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5189845561981201, "max": 0.4933343231678009, "mean": 0.0023664908949285746, "std": 0.05344504490494728, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2737047076225281, "max": 0.31558480858802795, "mean": 1.935944737851969e-06, "std": 0.020050112158060074, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3658909797668457, "max": 0.7117034196853638, "mean": 0.5931328535079956, "std": 0.04596179351210594, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.2108193188905716, "max": 0.1990451216697693, "mean": 3.062548057641834e-05, "std": 0.034867268055677414, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18712614476680756, "max": 0.20343470573425293, "mean": 0.0009520579478703439, "std": 0.031497176736593246, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2896800935268402, "max": 0.3398098945617676, "mean": -4.6883709728717804e-05, "std": 0.03458770364522934, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8768599033355713, "max": 3.3869552612304688, "mean": 0.014455841854214668, "std": 0.8583106398582458, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22448983788490295, "max": 0.24981370568275452, "mean": -3.890434527420439e-06, "std": 0.042229313403367996, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05526347830891609, "max": 0.046524014323949814, "mean": -2.1809362806379795e-05, "std": 0.01583988219499588, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2933104932308197, "max": 0.29035091400146484, "mean": -7.618443305545952e-06, "std": 0.04194440320134163, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.124831423163414, "max": 0.25899115204811096, "mean": -0.0032436971087008715, "std": 0.05317322164773941, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45623838901519775, "max": 0.844422459602356, "mean": 0.7054718732833862, "std": 0.03522763401269913, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5120505094528198, "max": 0.3482021689414978, "mean": 0.00034296896774321795, "std": 0.04019856080412865, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18573501706123352, "max": 0.03954247012734413, "mean": -0.039387013763189316, "std": 0.02136080153286457, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.543980062007904, "max": 0.5556398034095764, "mean": -7.12752080289647e-05, "std": 0.050733935087919235, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5116539001464844, "max": 0.6641847491264343, "mean": 0.0024422036949545145, "std": 0.049520041793584824, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.3325117230415344, "max": 0.2653426229953766, "mean": 3.3086610073951306e-06, "std": 0.019387137144804, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3219893276691437, "max": 0.7664631009101868, "mean": 0.6510411500930786, "std": 0.04532777890563011, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2498156577348709, "max": 0.2198626697063446, "mean": -1.886132849904243e-06, "std": 0.03650164604187012, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32695695757865906, "max": 0.2867416441440582, "mean": -0.000684951723087579, "std": 0.03855687379837036, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.31001296639442444, "max": 0.3700636327266693, "mean": 6.516962457681075e-05, "std": 0.036242250353097916, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.716774940490723, "max": 5.807016372680664, "mean": 0.03795425221323967, "std": 1.4130064249038696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22152067720890045, "max": 0.20586349070072174, "mean": -7.513246237067506e-05, "std": 0.042484886944293976, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.0776548758149147, "max": 0.05150791257619858, "mean": -0.0009258093778043985, "std": 0.016412504017353058, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.33054521679878235, "max": 0.32925283908843994, "mean": -4.675353011407424e-06, "std": 0.042791180312633514, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.284753680229187, "max": 0.1120273545384407, "mean": -0.0012038055574521422, "std": 0.04701421782374382, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4860539734363556, "max": 0.8868206739425659, "mean": 0.7373669743537903, "std": 0.03824283927679062, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.362324595451355, "max": 0.27455514669418335, "mean": 5.109608173370361e-05, "std": 0.04064401239156723, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24754445254802704, "max": 0.046375077217817307, "mean": -0.039263028651475906, "std": 0.02328905090689659, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6261394023895264, "max": 0.5965179204940796, "mean": -5.992384103592485e-05, "std": 0.053116101771593094, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7094439268112183, "max": 0.2657933533191681, "mean": 0.000917100696824491, "std": 0.05122515559196472, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3433791399002075, "max": 0.30369648337364197, "mean": 2.4011274035729e-07, "std": 0.019135721027851105, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34975123405456543, "max": 0.7829355597496033, "mean": 0.6388096809387207, "std": 0.049248941242694855, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20544706284999847, "max": 0.20679640769958496, "mean": -5.99185805185698e-05, "std": 0.037696123123168945, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.2586185336112976, "max": 0.2680370807647705, "mean": -0.00040146420360542834, "std": 0.04459588602185249, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3540765345096588, "max": 0.3223837912082672, "mean": -6.969309197302209e-06, "std": 0.03720474615693092, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.260976791381836, "max": 4.204005241394043, "mean": -0.026412418112158775, "std": 1.0066431760787964, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23861557245254517, "max": 0.24334679543972015, "mean": -2.5082641514018178e-05, "std": 0.04320957139134407, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06232341378927231, "max": 0.056674133986234665, "mean": 0.0003426429466344416, "std": 0.01415110845118761, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43692541122436523, "max": 0.37342891097068787, "mean": 1.4435072444030084e-05, "std": 0.04412085935473442, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09643299132585526, "max": 0.17589901387691498, "mean": -0.0006592142744921148, "std": 0.03515716642141342, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4216461777687073, "max": 1.0694262981414795, "mean": 0.7483195662498474, "std": 0.04205932468175888, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2665816843509674, "max": 0.2969212532043457, "mean": -7.953966996865347e-05, "std": 0.04080412909388542, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.1857525259256363, "max": 0.043901920318603516, "mean": -0.036818623542785645, "std": 0.025608688592910767, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4569249451160431, "max": 0.4865773022174835, "mean": 4.3881707824766636e-05, "std": 0.05420896038413048, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.28651300072669983, "max": 0.5512722134590149, "mean": -0.00088057282846421, "std": 0.04782658815383911, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.292865514755249, "max": 0.32280707359313965, "mean": 6.539526111737359e-06, "std": 0.019969915971159935, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.2909410893917084, "max": 0.7601442337036133, "mean": 0.6508233547210693, "std": 0.05213604494929314, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2434738278388977, "max": 0.2616451680660248, "mean": -6.040764219505945e-06, "std": 0.03961297869682312, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2675459682941437, "max": 0.1998538225889206, "mean": -0.0008808721322566271, "std": 0.05175367370247841, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2721429765224457, "max": 0.25373363494873047, "mean": 4.028795956401154e-06, "std": 0.03871006891131401, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.963708877563477, "max": 15.945626258850098, "mean": 0.03322511166334152, "std": 1.988985300064087, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.2071155309677124, "max": 0.22583135962486267, "mean": -7.227471360238269e-05, "std": 0.04055366292595863, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06934842467308044, "max": 0.06322810798883438, "mean": 0.00015266213449649513, "std": 0.01474202610552311, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46502697467803955, "max": 0.32068270444869995, "mean": 1.9500737835187465e-05, "std": 0.0405886135995388, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06406750530004501, "max": 0.1152099147439003, "mean": 0.0011921885889023542, "std": 0.0247051939368248, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37462663650512695, "max": 0.9322708249092102, "mean": 0.7508515119552612, "std": 0.040188200771808624, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.27930018305778503, "max": 0.2731732130050659, "mean": -0.00016858182789292186, "std": 0.040994688868522644, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19882012903690338, "max": 0.05084774270653725, "mean": -0.03202420845627785, "std": 0.025111209601163864, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6573402285575867, "max": 0.5352922677993774, "mean": -4.871936471317895e-05, "std": 0.05284557491540909, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1931021511554718, "max": 0.5820591449737549, "mean": -0.0005149454809725285, "std": 0.04106936603784561, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.4177095592021942, "max": 0.37194108963012695, "mean": 6.037503226252738e-06, "std": 0.021621696650981903, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21426498889923096, "max": 0.7471067905426025, "mean": 0.6495591998100281, "std": 0.05437273159623146, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20954997837543488, "max": 0.19577716290950775, "mean": 4.0040544263320044e-05, "std": 0.03946496173739433, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.3292751908302307, "max": 0.25935792922973633, "mean": -0.003224420826882124, "std": 0.05625506490468979, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2056337594985962, "max": 0.25471389293670654, "mean": 5.435157800093293e-05, "std": 0.038567062467336655, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.24283504486084, "max": 6.9316864013671875, "mean": 0.048334453254938126, "std": 1.3849503993988037, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20960700511932373, "max": 0.23016247153282166, "mean": -5.2383575166459195e-06, "std": 0.04131292924284935, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.043877486139535904, "max": 0.035942550748586655, "mean": 4.677800461649895e-06, "std": 0.012800506316125393, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39784368872642517, "max": 0.3448275029659271, "mean": -5.554455128731206e-05, "std": 0.04238935187458992, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05505242943763733, "max": 0.06286512315273285, "mean": 0.0003699597145896405, "std": 0.018672524020075798, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3501029312610626, "max": 1.0451030731201172, "mean": 0.7893401980400085, "std": 0.04874471575021744, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3334510326385498, "max": 0.38586220145225525, "mean": -0.0001694880920695141, "std": 0.041480448096990585, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15723954141139984, "max": 0.05913884937763214, "mean": -0.031833715736866, "std": 0.025140652433037758, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6964147090911865, "max": 0.4686952233314514, "mean": -9.150124969892204e-05, "std": 0.05179166793823242, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24826228618621826, "max": 0.32854214310646057, "mean": -0.00024761329405009747, "std": 0.0414327047765255, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2872432768344879, "max": 0.35023465752601624, "mean": -2.1361338440328836e-06, "std": 0.024239059537649155, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19656625390052795, "max": 0.7792166471481323, "mean": 0.6702941060066223, "std": 0.058692529797554016, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22861525416374207, "max": 0.23119905591011047, "mean": -1.981826062547043e-05, "std": 0.04044099524617195, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.21965257823467255, "max": 0.24067652225494385, "mean": 0.0007787347421981394, "std": 0.05579977110028267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.215622216463089, "max": 0.22666674852371216, "mean": -7.155455386964604e-05, "std": 0.03937716409564018, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.904394149780273, "max": 9.067266464233398, "mean": -0.001250309869647026, "std": 1.8481073379516602, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2693168520927429, "max": 0.25895655155181885, "mean": 4.356484714662656e-05, "std": 0.038407694548368454, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05762461572885513, "max": 0.057689178735017776, "mean": 0.00034963880898430943, "std": 0.014724270440638065, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2649986743927002, "max": 0.28868991136550903, "mean": -6.175809539854527e-05, "std": 0.039074063301086426, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.043768905103206635, "max": 0.0373171903192997, "mean": -8.572106889914721e-05, "std": 0.013365655206143856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3394976556301117, "max": 1.0926626920700073, "mean": 0.86370849609375, "std": 0.06385412812232971, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42326879501342773, "max": 0.419196218252182, "mean": 0.00031274266075342894, "std": 0.043502915650606155, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21476341784000397, "max": 0.17061911523342133, "mean": -0.029481371864676476, "std": 0.031948987394571304, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5996708869934082, "max": 0.5596612691879272, "mean": -0.00015256566985044628, "std": 0.053446218371391296, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17847125232219696, "max": 0.3766724169254303, "mean": 0.0013643248239532113, "std": 0.037309642881155014, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39427170157432556, "max": 0.3689534664154053, "mean": 3.643418676801957e-05, "std": 0.028621334582567215, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2903065085411072, "max": 0.826573371887207, "mean": 0.7055738568305969, "std": 0.06789194792509079, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9261522889137268, "max": 1.0264601707458496, "mean": -2.5637811631895602e-05, "std": 0.047625649720430374, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8783160448074341, "max": 0.8149734735488892, "mean": -0.00031416097772307694, "std": 0.09553803503513336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.2693849802017212, "max": 0.24096263945102692, "mean": -2.2922044081497006e-05, "std": 0.03895637020468712, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.73985481262207, "max": 22.84831428527832, "mean": -0.09187203645706177, "std": 4.069868564605713, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.227765753865242, "max": 0.24508675932884216, "mean": -2.5811230443650857e-05, "std": 0.03863935545086861, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06041998043656349, "max": 0.046056248247623444, "mean": -0.00014605963951908052, "std": 0.014698919840157032, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33846333622932434, "max": 0.3745211064815521, "mean": 7.246726454468444e-06, "std": 0.04081542044878006, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.0464671291410923, "max": 0.1957084834575653, "mean": 0.0002726902603171766, "std": 0.013569602742791176, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3744957149028778, "max": 1.1300216913223267, "mean": 0.8900200724601746, "std": 0.06398579478263855, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4477945864200592, "max": 0.5424723625183105, "mean": 2.4591532564954832e-05, "std": 0.04556761309504509, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22407397627830505, "max": 0.08826831728219986, "mean": -0.03201541677117348, "std": 0.03776346147060394, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7253258228302002, "max": 0.6892617344856262, "mean": 3.4524080547271296e-05, "std": 0.05177822336554527, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.1745493859052658, "max": 0.21855643391609192, "mean": 4.002213245257735e-05, "std": 0.0317784883081913, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.3402628004550934, "max": 0.37424033880233765, "mean": 4.292904486646876e-05, "std": 0.03414493426680565, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3175790011882782, "max": 1.2868926525115967, "mean": 0.6014685034751892, "std": 0.0834617167711258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.28334787487983704, "max": 0.26021766662597656, "mean": -3.078439021919621e-06, "std": 0.03598484769463539, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23551659286022186, "max": 0.20537099242210388, "mean": 0.0002320160565432161, "std": 0.056010857224464417, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4354335069656372, "max": 0.3252001106739044, "mean": 2.4517319616279565e-05, "std": 0.03413575515151024, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.544912338256836, "max": 7.312640190124512, "mean": -0.007366480305790901, "std": 0.6992346048355103, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.343842089176178, "max": 0.36349090933799744, "mean": 0.0001033815206028521, "std": 0.04782803729176521, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07375385612249374, "max": 0.06036338210105896, "mean": 0.0009326335857622325, "std": 0.014949528500437737, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.25554072856903076, "max": 0.28654900193214417, "mean": 4.4343978515826166e-06, "std": 0.041555255651474, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05532766133546829, "max": 0.06282689422369003, "mean": 0.00014148413902148604, "std": 0.007174154743552208, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49368223547935486, "max": 1.2208430767059326, "mean": 1.0134273767471313, "std": 0.11743992567062378, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0936156511306763, "max": 1.0469433069229126, "mean": -4.977267235517502e-05, "std": 0.05241084843873978, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22367312014102936, "max": 0.17280347645282745, "mean": -0.02724579907953739, "std": 0.03635029122233391, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8845533132553101, "max": 0.9224876165390015, "mean": -0.000146063175634481, "std": 0.053282301872968674, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17102308571338654, "max": 0.37991419434547424, "mean": 0.003368670353665948, "std": 0.03989797830581665, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7772527933120728, "max": 0.7234945297241211, "mean": 1.913893902383279e-05, "std": 0.04616517201066017, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3385581970214844, "max": 1.4277539253234863, "mean": 0.9483213424682617, "std": 0.20673882961273193, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7455896139144897, "max": 1.7045435905456543, "mean": 0.00022695818915963173, "std": 0.15868604183197021, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.199622631072998, "max": 1.099592685699463, "mean": -0.00953536294400692, "std": 0.203833669424057, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4213031232357025, "max": 0.42637819051742554, "mean": 6.450257205870003e-05, "std": 0.048018429428339005, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.743934631347656, "max": 19.539039611816406, "mean": -0.24830012023448944, "std": 4.776192665100098, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32387086749076843, "max": 0.4384032189846039, "mean": -1.2015252650598995e-05, "std": 0.046161383390426636, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.0340605154633522, "max": 0.037125036120414734, "mean": 0.0006421188591048121, "std": 0.012921434827148914, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.703487753868103, "max": 0.6645694375038147, "mean": 4.3493168050190434e-05, "std": 0.0578836165368557, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.0722307413816452, "max": 0.06750312447547913, "mean": -0.00013278273399919271, "std": 0.012919807806611061, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3801887333393097, "max": 1.3909631967544556, "mean": 1.0665581226348877, "std": 0.2197146713733673, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6164069175720215, "max": 0.7170259952545166, "mean": 0.00011130621714983135, "std": 0.058021292090415955, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21958374977111816, "max": 0.2251792550086975, "mean": 0.0062429094687104225, "std": 0.04972800984978676, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6296579241752625, "max": 0.8892135620117188, "mean": 1.1699157766997814e-05, "std": 0.023528022691607475, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5068321824073792, "max": 0.4739873707294464, "mean": -0.003016006201505661, "std": 0.06930257380008698, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5377203226089478, "max": 1.1807109117507935, "mean": 0.7827430367469788, "std": 0.09885811805725098, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2669532299041748, "max": 0.2126723825931549, "mean": -0.00022305321181192994, "std": 0.05399656668305397, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23791296780109406, "max": 0.014832733199000359, "mean": -0.04395970329642296, "std": 0.03433232381939888, "sparsity": 0.0, "shape": [ 100 ] } } }