{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43005406856536865, "max": 0.29851898550987244, "mean": -0.0025509949773550034, "std": 0.042555101215839386, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06313250213861465, "max": 0.10729768127202988, "mean": 0.0006133262650109828, "std": 0.03408696502447128, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.41268208622932434, "max": 0.8365541696548462, "mean": -0.00020702443725895137, "std": 0.02410811372101307, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11502047628164291, "max": 0.3207014203071594, "mean": -0.00093841488705948, "std": 0.019534854218363762, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7852821350097656, "max": 2.8634164333343506, "mean": -0.00036539402208290994, "std": 0.615379810333252, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.27854230999946594, "max": 0.38152772188186646, "mean": 0.0004230512131471187, "std": 0.042748332023620605, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22163018584251404, "max": 0.20894938707351685, "mean": -0.004489985294640064, "std": 0.040880318731069565, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4279509484767914, "max": 0.47543206810951233, "mean": 3.1694014523964142e-06, "std": 0.02450772561132908, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32420721650123596, "max": 0.15700779855251312, "mean": -0.04670684412121773, "std": 0.051544804126024246, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4101617932319641, "max": 0.3544142544269562, "mean": -0.00012779857206624, "std": 0.02359919063746929, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.2289954274892807, "max": 0.26173391938209534, "mean": -0.029131349176168442, "std": 0.04930002987384796, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25456100702285767, "max": 0.818419873714447, "mean": 0.5253804922103882, "std": 0.08069705218076706, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2965428829193115, "max": 0.26520034670829773, "mean": -0.00042467008461244404, "std": 0.03210080415010452, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09260489046573639, "max": 0.1250484734773636, "mean": 0.0006493350956588984, "std": 0.025727085769176483, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.2901724576950073, "max": 0.281167596578598, "mean": -7.525501860072836e-05, "std": 0.030932163819670677, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.8939008712768555, "max": 5.80875825881958, "mean": -0.009307368658483028, "std": 1.2948225736618042, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4246821701526642, "max": 0.34353208541870117, "mean": 9.80871482170187e-05, "std": 0.029952067881822586, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.02886669710278511, "max": 0.027609167620539665, "mean": -0.0003159984771627933, "std": 0.01256631314754486, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4538891911506653, "max": 0.4482215344905853, "mean": 2.2922111384104937e-05, "std": 0.02385348081588745, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08867117762565613, "max": 0.09104129672050476, "mean": 0.0022725451271981, "std": 0.019507737830281258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26674631237983704, "max": 1.054079532623291, "mean": 0.5310790538787842, "std": 0.10425138473510742, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5743944644927979, "max": 0.6082407832145691, "mean": -0.00042930786730721593, "std": 0.03859541565179825, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18188051879405975, "max": 0.04570186883211136, "mean": -0.029450394213199615, "std": 0.04259800165891647, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1662050485610962, "max": 1.6339434385299683, "mean": 0.00032052083406597376, "std": 0.027692945674061775, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16221286356449127, "max": 0.2055274099111557, "mean": -0.021118517965078354, "std": 0.027932317927479744, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22425268590450287, "max": 0.8419703841209412, "mean": 0.48751628398895264, "std": 0.0750974491238594, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.2551511526107788, "max": 0.30577754974365234, "mean": -8.399176294915378e-06, "std": 0.03346917778253555, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09521990269422531, "max": 0.11036473512649536, "mean": 6.435990508180112e-05, "std": 0.026954451575875282, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2969436049461365, "max": 0.29559123516082764, "mean": 5.0998860388062894e-05, "std": 0.032539013773202896, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.159433841705322, "max": 5.079733371734619, "mean": -0.014565235003829002, "std": 1.156693696975708, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.3445141315460205, "max": 0.3432990610599518, "mean": 7.890153938205913e-05, "std": 0.03005831316113472, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03612125664949417, "max": 0.03314004838466644, "mean": -0.00014305136573966593, "std": 0.013020108453929424, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3150654435157776, "max": 0.3748987019062042, "mean": -2.0872395907645114e-05, "std": 0.02405514195561409, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10531895607709885, "max": 0.12192098051309586, "mean": -0.0019657753873616457, "std": 0.028842739760875702, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3119339942932129, "max": 1.1190955638885498, "mean": 0.6662184000015259, "std": 0.09769617766141891, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8722184300422668, "max": 0.6274752616882324, "mean": 0.0016759471036493778, "std": 0.047436658293008804, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27076128125190735, "max": 0.034267961978912354, "mean": -0.046592649072408676, "std": 0.040578801184892654, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9206072688102722, "max": 0.96403568983078, "mean": 0.0010221146512776613, "std": 0.040701672434806824, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14442752301692963, "max": 0.0748896598815918, "mean": -0.009088763035833836, "std": 0.02569626271724701, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23972344398498535, "max": 0.7111932635307312, "mean": 0.44715946912765503, "std": 0.05921364948153496, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27250099182128906, "max": 0.297283798456192, "mean": 8.777939001447521e-06, "std": 0.03547067567706108, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11882907897233963, "max": 0.1182771623134613, "mean": 0.0007498766062781215, "std": 0.027608048170804977, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2806638181209564, "max": 0.27924486994743347, "mean": -7.666053716093302e-05, "std": 0.03510000556707382, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5072221755981445, "max": 2.5192060470581055, "mean": 0.026715079322457314, "std": 0.586592435836792, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.22091323137283325, "max": 0.2714807987213135, "mean": 2.762420081126038e-06, "std": 0.030731365084648132, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03329985961318016, "max": 0.031178824603557587, "mean": 0.00011736361193470657, "std": 0.012398799881339073, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.2350921630859375, "max": 0.23149597644805908, "mean": 5.688454257324338e-05, "std": 0.025696979835629463, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13562175631523132, "max": 0.1278066188097, "mean": -0.00549966748803854, "std": 0.039964329451322556, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3545263111591339, "max": 1.1705567836761475, "mean": 0.7105071544647217, "std": 0.10373809188604355, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6171801686286926, "max": 0.5549061298370361, "mean": 0.0011606733314692974, "std": 0.04611368104815483, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.1888936311006546, "max": 0.024856731295585632, "mean": -0.034840360283851624, "std": 0.028601042926311493, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1303929090499878, "max": 0.9700294137001038, "mean": 0.00035928928991779685, "std": 0.04234178736805916, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5973078012466431, "max": 0.06291170418262482, "mean": -0.004878643434494734, "std": 0.028604039922356606, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3753509521484375, "max": 0.9391864538192749, "mean": 0.5924164056777954, "std": 0.06680406630039215, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.391277939081192, "max": 0.36899876594543457, "mean": 7.035685848677531e-05, "std": 0.03718537837266922, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11886083334684372, "max": 0.1363811194896698, "mean": 0.0009265001863241196, "std": 0.029201578348875046, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6185654401779175, "max": 0.5083082914352417, "mean": 1.5324059859267436e-05, "std": 0.0364382304251194, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.179115295410156, "max": 8.780653953552246, "mean": -0.10920821875333786, "std": 1.697803258895874, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27624833583831787, "max": 0.23940874636173248, "mean": 5.239578240434639e-05, "std": 0.0326123982667923, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05171733349561691, "max": 0.039454903453588486, "mean": 9.008367487695068e-05, "std": 0.012963240966200829, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.2306506633758545, "max": 0.23440538346767426, "mean": -2.216407301602885e-05, "std": 0.02938910946249962, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.2041204422712326, "max": 0.1051875501871109, "mean": -0.004020026419311762, "std": 0.03262867406010628, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3396590054035187, "max": 1.0105489492416382, "mean": 0.7007004022598267, "std": 0.0967300534248352, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5642524361610413, "max": 0.8327149152755737, "mean": 0.0004152198671363294, "std": 0.04229423776268959, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21180973947048187, "max": 0.030382230877876282, "mean": -0.032180484384298325, "std": 0.02649112045764923, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7539102435112, "max": 0.7183676958084106, "mean": -1.6375699487980455e-05, "std": 0.03683510050177574, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26317542791366577, "max": 0.10612691938877106, "mean": -0.003012202214449644, "std": 0.028860073536634445, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28410062193870544, "max": 0.6937515735626221, "mean": 0.49938827753067017, "std": 0.04646085575222969, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27815356850624084, "max": 0.233821839094162, "mean": -0.00011090396583313122, "std": 0.03875657916069031, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15374495089054108, "max": 0.126325324177742, "mean": -0.0022300099954009056, "std": 0.033342309296131134, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.4138854146003723, "max": 0.6591927409172058, "mean": -1.8888074919232167e-05, "std": 0.03909528627991676, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.2339067459106445, "max": 4.718007564544678, "mean": -0.020461430773139, "std": 1.007363200187683, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.2449360489845276, "max": 0.207246333360672, "mean": 4.3898020521737635e-05, "std": 0.033962249755859375, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03454353287816048, "max": 0.04481153190135956, "mean": -1.8621416529640555e-05, "std": 0.01263485848903656, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20073898136615753, "max": 0.20600160956382751, "mean": -2.920800579886418e-05, "std": 0.0310201458632946, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.1997092068195343, "max": 0.11323567479848862, "mean": -0.002894954290241003, "std": 0.0345144160091877, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36691704392433167, "max": 1.0552048683166504, "mean": 0.670504629611969, "std": 0.06634049117565155, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39792558550834656, "max": 0.5017094612121582, "mean": -3.8320780731737614e-05, "std": 0.04113030061125755, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12866847217082977, "max": 0.026868799701333046, "mean": -0.030530910938978195, "std": 0.02187257632613182, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4486997127532959, "max": 0.4325278401374817, "mean": 7.570705201942474e-05, "std": 0.03489042818546295, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.26739102602005005, "max": 0.07290376722812653, "mean": -0.001090540667064488, "std": 0.023126306012272835, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28740835189819336, "max": 0.6838006973266602, "mean": 0.5244842767715454, "std": 0.04748576506972313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22222448885440826, "max": 0.22337274253368378, "mean": 1.5597350284224376e-05, "std": 0.038948558270931244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.1362549066543579, "max": 0.1092236116528511, "mean": 0.00024021141871344298, "std": 0.029209597036242485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37488552927970886, "max": 0.43708565831184387, "mean": -9.820145351113752e-06, "std": 0.039285808801651, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8422415256500244, "max": 4.994611740112305, "mean": 0.009733816608786583, "std": 0.8449002504348755, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22278591990470886, "max": 0.21995313465595245, "mean": -2.4143082555383444e-07, "std": 0.03440921753644943, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04355766996741295, "max": 0.03580183535814285, "mean": -0.0002584094472695142, "std": 0.012078197672963142, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21266809105873108, "max": 0.18842695653438568, "mean": -1.707848787191324e-05, "std": 0.03153562918305397, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18067854642868042, "max": 0.12067519873380661, "mean": -0.0023923253174871206, "std": 0.04126231372356415, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.42283520102500916, "max": 0.9399095773696899, "mean": 0.6626414060592651, "std": 0.056763265281915665, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.37058448791503906, "max": 0.4756770133972168, "mean": -8.219464507419616e-05, "std": 0.040889278054237366, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20835021138191223, "max": 0.027245184406638145, "mean": -0.03023524209856987, "std": 0.02135040983557701, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3404720425605774, "max": 0.7332155108451843, "mean": 8.202612661989406e-05, "std": 0.03476588428020477, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2399250864982605, "max": 0.050362419337034225, "mean": -0.0011862949468195438, "std": 0.020457014441490173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.306090772151947, "max": 0.6522687077522278, "mean": 0.5250887274742126, "std": 0.0460890494287014, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3040372133255005, "max": 0.21722179651260376, "mean": 7.015860319370404e-05, "std": 0.0394948311150074, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14904865622520447, "max": 0.1309719830751419, "mean": 0.0003389039193280041, "std": 0.03043319098651409, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.2568168342113495, "max": 0.20181529223918915, "mean": 3.114001810899936e-05, "std": 0.039484698325395584, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3340678215026855, "max": 2.373654365539551, "mean": -0.026232335716485977, "std": 0.4496069550514221, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18832948803901672, "max": 0.2102191150188446, "mean": 3.7190951843513176e-05, "std": 0.03479335457086563, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03177480027079582, "max": 0.03555988520383835, "mean": -0.00019898739992640913, "std": 0.012286651879549026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.1882997751235962, "max": 0.16997897624969482, "mean": -6.833271618233994e-05, "std": 0.03217003867030144, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13938407599925995, "max": 0.1373613476753235, "mean": -0.0025095485616475344, "std": 0.051287971436977386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4670821726322174, "max": 0.9539185762405396, "mean": 0.6688235998153687, "std": 0.05267348513007164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3240530490875244, "max": 0.30894580483436584, "mean": -9.802424756344408e-07, "std": 0.04094521328806877, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12482603639364243, "max": 0.025560826063156128, "mean": -0.030691375955939293, "std": 0.01981331594288349, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.4391370117664337, "max": 0.4447336196899414, "mean": 9.505114576313645e-05, "std": 0.03511868044734001, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22435998916625977, "max": 0.051745057106018066, "mean": -0.0011790611315518618, "std": 0.018466567620635033, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.339127779006958, "max": 0.7379522323608398, "mean": 0.5586450695991516, "std": 0.041346412152051926, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27276721596717834, "max": 0.2783542275428772, "mean": 2.0316545487730764e-05, "std": 0.04105677455663681, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13677620887756348, "max": 0.13981792330741882, "mean": 0.0004895473830401897, "std": 0.026616644114255905, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4901849925518036, "max": 0.3555382788181305, "mean": 8.898908708943054e-05, "std": 0.04069453105330467, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.2957122325897217, "max": 1.7441315650939941, "mean": -0.02107611857354641, "std": 0.5000779628753662, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2175905406475067, "max": 0.19755098223686218, "mean": -4.055129102198407e-05, "std": 0.03423253819346428, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.041273877024650574, "max": 0.038862332701683044, "mean": -0.0001397906889906153, "std": 0.012886369600892067, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17747005820274353, "max": 0.1828984022140503, "mean": 4.791315950569697e-05, "std": 0.03155587986111641, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.17983144521713257, "max": 0.1835365742444992, "mean": -0.0022142226807773113, "std": 0.054839469492435455, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742608368396759, "max": 1.0234043598175049, "mean": 0.645187497138977, "std": 0.050187092274427414, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2714308202266693, "max": 0.3094487190246582, "mean": 0.00011228019138798118, "std": 0.04068155214190483, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.1052371934056282, "max": 0.026651456952095032, "mean": -0.029516855254769325, "std": 0.017926618456840515, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.33875298500061035, "max": 0.3289111852645874, "mean": 5.248367233434692e-05, "std": 0.03441265597939491, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.1814928501844406, "max": 0.04225185513496399, "mean": -0.0010585930431261659, "std": 0.017206743359565735, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.325328528881073, "max": 0.6851887106895447, "mean": 0.5111891627311707, "std": 0.03689680993556976, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2336086481809616, "max": 0.2251969277858734, "mean": -3.625164390541613e-05, "std": 0.039176031947135925, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11540839821100235, "max": 0.13177232444286346, "mean": 0.00015377491945400834, "std": 0.029171116650104523, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.35232973098754883, "max": 0.2849805951118469, "mean": 6.946377197891707e-06, "std": 0.0392446406185627, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.128444194793701, "max": 3.5404324531555176, "mean": -0.011580632999539375, "std": 0.6822744011878967, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21085655689239502, "max": 0.20925314724445343, "mean": 3.461689630057663e-05, "std": 0.03448476642370224, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03582029417157173, "max": 0.0481770783662796, "mean": 0.000791961036156863, "std": 0.012865905649960041, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.2102348804473877, "max": 0.19295428693294525, "mean": -1.266141225642059e-06, "std": 0.03169584646821022, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18637949228286743, "max": 0.17694726586341858, "mean": -0.0028348618652671576, "std": 0.058624111115932465, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.47455769777297974, "max": 1.0399035215377808, "mean": 0.6513059735298157, "std": 0.049517374485731125, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.2480839341878891, "max": 0.32886141538619995, "mean": 0.00018076057313010097, "std": 0.040569957345724106, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12484849989414215, "max": 0.024815550073981285, "mean": -0.030500907450914383, "std": 0.01760847680270672, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42022550106048584, "max": 0.4810453951358795, "mean": -1.3774351828033105e-06, "std": 0.03539680689573288, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15139424800872803, "max": 0.04337864741683006, "mean": 4.9671380111249164e-05, "std": 0.014884358271956444, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.3155671954154968, "max": 0.6806262135505676, "mean": 0.5528896450996399, "std": 0.04069091007113457, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20614612102508545, "max": 0.2194698005914688, "mean": 3.180014027748257e-05, "std": 0.038299210369586945, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13776730000972748, "max": 0.11263402551412582, "mean": 2.7509784558787942e-05, "std": 0.02582019381225109, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4022030830383301, "max": 0.3703415095806122, "mean": 2.5775392714422196e-05, "std": 0.03817988187074661, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.767340898513794, "max": 2.8659963607788086, "mean": 0.0011514686048030853, "std": 0.5165835022926331, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20330490171909332, "max": 0.1975128948688507, "mean": 2.9661892767762765e-05, "std": 0.03429696336388588, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05067470669746399, "max": 0.03985888883471489, "mean": -0.0004201547708362341, "std": 0.013416973873972893, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19610381126403809, "max": 0.20185545086860657, "mean": -1.2482038982852828e-05, "std": 0.031804922968149185, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19282294809818268, "max": 0.19485345482826233, "mean": -0.0029612130019813776, "std": 0.06253436952829361, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3490590453147888, "max": 1.081492304801941, "mean": 0.6670613884925842, "std": 0.05502287670969963, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22548414766788483, "max": 0.2509278655052185, "mean": 0.00035874126479029655, "std": 0.04075963795185089, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.0911286398768425, "max": 0.043736688792705536, "mean": -0.03008149564266205, "std": 0.017609886825084686, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.3527411222457886, "max": 0.30355900526046753, "mean": -4.3905802158406004e-05, "std": 0.037122152745723724, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16155573725700378, "max": 0.06323426961898804, "mean": -8.016945503186435e-05, "std": 0.019409824162721634, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34882256388664246, "max": 0.7205829620361328, "mean": 0.5423275232315063, "std": 0.03903055191040039, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21910026669502258, "max": 0.2230084389448166, "mean": -1.1230863492528442e-05, "std": 0.03923042118549347, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11831706017255783, "max": 0.17028944194316864, "mean": 0.0002854751655831933, "std": 0.02510806918144226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24612674117088318, "max": 0.3002479076385498, "mean": -3.693345206556842e-05, "std": 0.03892989829182625, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.501706838607788, "max": 3.7109532356262207, "mean": 0.015846284106373787, "std": 0.7818700075149536, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.2186352014541626, "max": 0.2372058928012848, "mean": -1.3363219295570161e-05, "std": 0.03630276769399643, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04711708053946495, "max": 0.05125221982598305, "mean": 0.00047675782116129994, "std": 0.013513283804059029, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.2137574851512909, "max": 0.2170482724905014, "mean": 5.6474542361684144e-05, "std": 0.033615030348300934, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21112386882305145, "max": 0.23111283779144287, "mean": -0.005101324524730444, "std": 0.06186835095286369, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36194419860839844, "max": 1.0987720489501953, "mean": 0.6991980671882629, "std": 0.05339714512228966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23452329635620117, "max": 0.24459832906723022, "mean": 0.0004634420620277524, "std": 0.041268572211265564, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09795372933149338, "max": 0.0681690126657486, "mean": -0.031430941075086594, "std": 0.018122123554348946, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.3014773726463318, "max": 0.3510685861110687, "mean": -8.210168743971735e-05, "std": 0.04027429223060608, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15211886167526245, "max": 0.14952634274959564, "mean": 0.0002581052831374109, "std": 0.023030627518892288, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9992543458938599, "max": 1.000257968902588, "mean": 0.9997284412384033, "std": 0.00024261184444185346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031257662922143936, "max": 0.03125471994280815, "mean": -1.929123027366586e-05, "std": 0.018041206523776054, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.03122766688466072, "max": 0.030988017097115517, "mean": -0.0010841797338798642, "std": 0.01795079931616783, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031254444271326065, "max": 0.031258873641490936, "mean": 3.5479256439430173e-06, "std": 0.018041614443063736, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031154906377196312, "max": 0.03117496706545353, "mean": 0.0003339025133755058, "std": 0.018063001334667206, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0006141028716228902, "max": 0.0004136512288823724, "mean": 1.3743268709731638e-06, "std": 0.0001376789587084204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9981284141540527, "max": 1.001622200012207, "mean": 0.9998474717140198, "std": 0.0006079401355236769, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.032770540565252304, "max": 0.032834719866514206, "mean": -6.686397682642564e-06, "std": 0.01804281771183014, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.032758843153715134, "max": 0.03259320184588432, "mean": -0.00013118298375047743, "std": 0.017956331372261047, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.001173654804006219, "max": 0.0011514672078192234, "mean": 3.6397079838934587e-07, "std": 0.00021431130880955607, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005246364744380116, "max": 0.000398451229557395, "mean": 2.265020839331555e-06, "std": 0.0001267467887373641, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.38304001092910767, "max": 0.717822790145874, "mean": 0.5806512236595154, "std": 0.03879348561167717, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.2381902039051056, "max": 0.1962050199508667, "mean": 2.6112733394256793e-05, "std": 0.03746553510427475, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11878937482833862, "max": 0.16630207002162933, "mean": 0.0009804379660636187, "std": 0.027551008388400078, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24597673118114471, "max": 0.499647855758667, "mean": -5.027425504522398e-05, "std": 0.03762295842170715, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9381461143493652, "max": 3.7654519081115723, "mean": -0.003569968044757843, "std": 0.6810594201087952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22724951803684235, "max": 0.25177428126335144, "mean": -1.1575086318771355e-05, "std": 0.037434518337249756, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07160108536481857, "max": 0.08055920898914337, "mean": -0.0005123723531141877, "std": 0.015660181641578674, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22791653871536255, "max": 0.25741860270500183, "mean": -2.8733527869917452e-05, "std": 0.035421404987573624, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20038263499736786, "max": 0.21485595405101776, "mean": -0.005531632341444492, "std": 0.06833721697330475, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.4051814377307892, "max": 1.186793327331543, "mean": 0.7378474473953247, "std": 0.055015575140714645, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.2207704335451126, "max": 0.24539422988891602, "mean": 0.0005212163086980581, "std": 0.04133594036102295, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10323301702737808, "max": 0.02423531748354435, "mean": -0.03266426920890808, "std": 0.018886635079979897, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.44897761940956116, "max": 0.42180517315864563, "mean": -0.0004341494059190154, "std": 0.04689624160528183, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25117069482803345, "max": 0.46963006258010864, "mean": 0.003201500279828906, "std": 0.044517986476421356, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3168058395385742, "max": 0.3330129086971283, "mean": -2.5202643882948905e-05, "std": 0.021287493407726288, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32449325919151306, "max": 0.6839006543159485, "mean": 0.5709657073020935, "std": 0.04467146471142769, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16424405574798584, "max": 0.1741371899843216, "mean": -4.883421570411883e-05, "std": 0.033180903643369675, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18656986951828003, "max": 0.14275068044662476, "mean": 4.2517087422311306e-05, "std": 0.029676001518964767, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3805179297924042, "max": 0.24586445093154907, "mean": -9.98385530692758e-06, "std": 0.03276193141937256, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6520333290100098, "max": 3.2866697311401367, "mean": -0.01423930749297142, "std": 0.984977662563324, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23466402292251587, "max": 0.24725867807865143, "mean": -1.800561039999593e-05, "std": 0.04169729724526405, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07259472459554672, "max": 0.15434128046035767, "mean": 0.0006652789888903499, "std": 0.02516855113208294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2662595510482788, "max": 0.24813267588615417, "mean": -1.5347548469435424e-05, "std": 0.04013809189200401, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18939754366874695, "max": 0.19454091787338257, "mean": -0.0012339097447693348, "std": 0.06667902320623398, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32912713289260864, "max": 0.9980567097663879, "mean": 0.7191190719604492, "std": 0.05222564935684204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23154447972774506, "max": 0.2451959252357483, "mean": 0.00018269156862515956, "std": 0.04089995473623276, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11424808949232101, "max": 0.01902252808213234, "mean": -0.04247482866048813, "std": 0.018848657608032227, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3893679976463318, "max": 0.4069530963897705, "mean": -2.1458035917021334e-05, "std": 0.04853350669145584, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6924692392349243, "max": 0.4121605455875397, "mean": 0.0008477990049868822, "std": 0.06026294827461243, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0010412124684080482, "max": 1.00050687789917, "mean": 0.00048820613301359117, "std": 0.02208906039595604, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9985182881355286, "max": 1.000278115272522, "mean": 0.9996296167373657, "std": 0.0004832371196243912, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03125324100255966, "max": 0.03125615417957306, "mean": -2.1021265638410114e-05, "std": 0.01803254708647728, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.03121461719274521, "max": 0.031231539323925972, "mean": -0.0006769909523427486, "std": 0.017827048897743225, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03125639632344246, "max": 0.031260956078767776, "mean": -8.831522791297175e-06, "std": 0.018031572923064232, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.03123198263347149, "max": 0.031244853511452675, "mean": -0.0007297562551684678, "std": 0.017941949889063835, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0004176551883574575, "max": 0.0003318839881103486, "mean": -3.140859689665376e-06, "std": 0.00011632459791144356, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9979198575019836, "max": 1.0014318227767944, "mean": 0.9994964599609375, "std": 0.0006108160014264286, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03245294839143753, "max": 0.032378438860177994, "mean": -1.7318175196123775e-06, "std": 0.018028022721409798, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03213566541671753, "max": 0.03115900792181492, "mean": -0.0003739359090104699, "std": 0.018043629825115204, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0012771300971508026, "max": 0.0011123745935037732, "mean": -8.958944022197102e-07, "std": 0.00020973320351913571, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00034164811950176954, "max": 0.0002967154432553798, "mean": -3.7618522128468612e-06, "std": 0.00010472961730556563, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.2341979742050171, "max": 0.27227067947387695, "mean": 6.760874839528697e-06, "std": 0.01880943961441517, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32133588194847107, "max": 0.6926518678665161, "mean": 0.5816141963005066, "std": 0.04592034965753555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.1816624104976654, "max": 0.19737666845321655, "mean": -1.1567326509975828e-05, "std": 0.03318365663290024, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16045045852661133, "max": 0.12930794060230255, "mean": -0.0010751842055469751, "std": 0.03413202986121178, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3320204019546509, "max": 0.31095007061958313, "mean": -1.016673104459187e-05, "std": 0.032234374433755875, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.795230388641357, "max": 8.753500938415527, "mean": 0.09339793026447296, "std": 1.6184653043746948, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23359645903110504, "max": 0.2416210174560547, "mean": 4.149888991378248e-05, "std": 0.04085618630051613, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07583926618099213, "max": 0.06566201150417328, "mean": 0.0004832554841414094, "std": 0.01940709352493286, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24546822905540466, "max": 0.23373769223690033, "mean": -3.0527116905432194e-06, "std": 0.03943083807826042, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16301113367080688, "max": 0.16089561581611633, "mean": 0.0016276519745588303, "std": 0.06527570635080338, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.556946873664856, "max": 0.9415686726570129, "mean": 0.7127838134765625, "std": 0.03996752202510834, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22765818238258362, "max": 0.25477662682533264, "mean": -4.5632557885255665e-05, "std": 0.04057467356324196, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.1348292976617813, "max": 0.022138668224215508, "mean": -0.04134812578558922, "std": 0.01838543266057968, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42094686627388, "max": 0.3921053111553192, "mean": -4.4014304876327515e-06, "std": 0.04778384044766426, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6069029569625854, "max": 0.6509266495704651, "mean": 0.0015840512933209538, "std": 0.05682184174656868, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.25153595209121704, "max": 0.320549339056015, "mean": -6.0848738030472305e-06, "std": 0.019612807780504227, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35961607098579407, "max": 0.6813214421272278, "mean": 0.570705771446228, "std": 0.04296967759728432, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22012382745742798, "max": 0.17660681903362274, "mean": -3.47153763868846e-05, "std": 0.03429870679974556, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.1630830317735672, "max": 0.23280400037765503, "mean": 0.00036220261245034635, "std": 0.03281139209866524, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.263581246137619, "max": 0.23967352509498596, "mean": -5.2856208640150726e-05, "std": 0.03389754518866539, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.849710464477539, "max": 5.085712909698486, "mean": 0.043873172253370285, "std": 1.2286995649337769, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24600939452648163, "max": 0.25006523728370667, "mean": 7.234106305986643e-05, "std": 0.04398686811327934, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06254445016384125, "max": 0.054417435079813004, "mean": 0.0006422345177270472, "std": 0.017186632379889488, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.28586557507514954, "max": 0.2718929648399353, "mean": -5.018173033022322e-05, "std": 0.0429849736392498, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.1608622968196869, "max": 0.17021305859088898, "mean": -0.0028866538777947426, "std": 0.05928993597626686, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.519731879234314, "max": 0.9308202266693115, "mean": 0.7133743166923523, "std": 0.03828318044543266, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23790688812732697, "max": 0.24848711490631104, "mean": 0.00046475647832266986, "std": 0.04045366868376732, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14495447278022766, "max": 0.04111183062195778, "mean": -0.039693139493465424, "std": 0.020540453493595123, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5317410826683044, "max": 0.581489622592926, "mean": 5.736372258979827e-06, "std": 0.04885946586728096, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5184876322746277, "max": 0.4928899109363556, "mean": 0.002365314168855548, "std": 0.05342720076441765, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.27367857098579407, "max": 0.3154536187648773, "mean": 2.0265892999304924e-06, "std": 0.020049458369612694, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36605367064476013, "max": 0.7104601860046387, "mean": 0.5931398272514343, "std": 0.04595194756984711, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21068720519542694, "max": 0.19896060228347778, "mean": 3.061807728954591e-05, "std": 0.03486604616045952, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18698948621749878, "max": 0.20358456671237946, "mean": 0.0009543596534058452, "std": 0.03149386867880821, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2894982397556305, "max": 0.339619904756546, "mean": -4.7122804971877486e-05, "std": 0.034586917608976364, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8732118606567383, "max": 3.3837733268737793, "mean": 0.014458216726779938, "std": 0.8580982089042664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.224315345287323, "max": 0.24964982271194458, "mean": -3.871130957122659e-06, "std": 0.042229585349559784, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.055275678634643555, "max": 0.04663092643022537, "mean": -1.647317549213767e-05, "std": 0.015846259891986847, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2928326427936554, "max": 0.29024964570999146, "mean": -7.346136044361629e-06, "std": 0.04194441810250282, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12488731741905212, "max": 0.2587108016014099, "mean": -0.0032421478535979986, "std": 0.05317580699920654, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4563259780406952, "max": 0.8424069881439209, "mean": 0.7054323554039001, "std": 0.03509839251637459, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5117396712303162, "max": 0.34794938564300537, "mean": 0.00034281908301636577, "std": 0.04019879177212715, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.1857415735721588, "max": 0.03958635777235031, "mean": -0.03938839212059975, "std": 0.021348465234041214, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5434486865997314, "max": 0.5551662445068359, "mean": -7.160313543863595e-05, "std": 0.050734180957078934, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5112110376358032, "max": 0.6635048389434814, "mean": 0.002443352248519659, "std": 0.04949941858649254, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.3325079083442688, "max": 0.2651371359825134, "mean": 3.4327572393522132e-06, "std": 0.019386671483516693, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3219457268714905, "max": 0.7650159597396851, "mean": 0.6510248780250549, "std": 0.04531543329358101, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.24919819831848145, "max": 0.21938340365886688, "mean": -2.0984125512768514e-06, "std": 0.03650059178471565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32654333114624023, "max": 0.2866538465023041, "mean": -0.0006891752709634602, "std": 0.03852362558245659, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.30977994203567505, "max": 0.36965611577033997, "mean": 6.506919453386217e-05, "std": 0.03624110668897629, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.713971138000488, "max": 5.803556442260742, "mean": 0.03793709725141525, "std": 1.412732481956482, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22124992311000824, "max": 0.20528917014598846, "mean": -7.50878534745425e-05, "std": 0.042485084384679794, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07763200253248215, "max": 0.05141681060194969, "mean": -0.0009281833190470934, "std": 0.01641252264380455, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.33066344261169434, "max": 0.32909321784973145, "mean": -4.5878937271481846e-06, "std": 0.04279147461056709, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2844299376010895, "max": 0.1119050681591034, "mean": -0.001205054228194058, "std": 0.0470142662525177, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.48612144589424133, "max": 0.8848820328712463, "mean": 0.7373377084732056, "std": 0.03814017400145531, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.36209484934806824, "max": 0.2740732431411743, "mean": 5.125169991515577e-05, "std": 0.04064430668950081, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2473653107881546, "max": 0.046401649713516235, "mean": -0.03926541656255722, "std": 0.02327280305325985, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6253157258033752, "max": 0.5961773991584778, "mean": -6.133734132163227e-05, "std": 0.0531163364648819, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7087676525115967, "max": 0.2656005322933197, "mean": 0.0009179539047181606, "std": 0.05120791867375374, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3432927131652832, "max": 0.3036082684993744, "mean": 1.7233912785741268e-07, "std": 0.01913507841527462, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34983396530151367, "max": 0.78127521276474, "mean": 0.6388033628463745, "std": 0.04922258108854294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20482076704502106, "max": 0.20643775165081024, "mean": -5.993415470584296e-05, "std": 0.037695497274398804, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.2582729458808899, "max": 0.2677401304244995, "mean": -0.0004000938788522035, "std": 0.04457787051796913, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3535555303096771, "max": 0.3218846917152405, "mean": -7.005222414591117e-06, "std": 0.03720390424132347, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.2560133934021, "max": 4.200046062469482, "mean": -0.026399940252304077, "std": 1.0062882900238037, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.2381831258535385, "max": 0.24307270348072052, "mean": -2.52762038144283e-05, "std": 0.0432097353041172, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.0622570626437664, "max": 0.05666593089699745, "mean": 0.0003454152902122587, "std": 0.014151728712022305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43709275126457214, "max": 0.37350907921791077, "mean": 1.4359582564793527e-05, "std": 0.04412123188376427, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09637399762868881, "max": 0.17579396069049835, "mean": -0.00066028768196702, "std": 0.035156894475221634, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4216686189174652, "max": 1.067047357559204, "mean": 0.7483223080635071, "std": 0.04198553413152695, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.26631179451942444, "max": 0.2965000867843628, "mean": -7.944944081827998e-05, "std": 0.040804266929626465, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.1849687099456787, "max": 0.04366198182106018, "mean": -0.03681465983390808, "std": 0.025593994185328484, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4571255147457123, "max": 0.4859236776828766, "mean": 4.341108797234483e-05, "std": 0.05420951172709465, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.28613921999931335, "max": 0.5508683919906616, "mean": -0.0008792161825112998, "std": 0.04781510680913925, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2926841676235199, "max": 0.3227182626724243, "mean": 6.155195478640962e-06, "std": 0.019968634471297264, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29101473093032837, "max": 0.7585480213165283, "mean": 0.6508181095123291, "std": 0.05212597921490669, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.24345757067203522, "max": 0.2612913250923157, "mean": -6.02660793447285e-06, "std": 0.03961166366934776, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2671639025211334, "max": 0.19983193278312683, "mean": -0.0008803074015304446, "std": 0.05174032971262932, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2718494236469269, "max": 0.25337839126586914, "mean": 4.495690518524498e-06, "std": 0.0387086495757103, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.951557159423828, "max": 15.930760383605957, "mean": 0.03321323171257973, "std": 1.9877210855484009, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.2069142907857895, "max": 0.225667342543602, "mean": -7.223337888717651e-05, "std": 0.04055356606841087, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06923694908618927, "max": 0.06314270943403244, "mean": 0.00015547810471616685, "std": 0.0147401699796319, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4649372100830078, "max": 0.3204408884048462, "mean": 1.968499054783024e-05, "std": 0.04058866575360298, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06409196555614471, "max": 0.11513285338878632, "mean": 0.0011910968460142612, "std": 0.024711282923817635, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.374662309885025, "max": 0.9300851821899414, "mean": 0.7508615255355835, "std": 0.04013195261359215, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2791317403316498, "max": 0.2725660502910614, "mean": -0.00016837481234688312, "std": 0.040994856506586075, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.1984652727842331, "max": 0.05115879327058792, "mean": -0.03202404826879501, "std": 0.02509358339011669, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6568311452865601, "max": 0.5346067547798157, "mean": -4.890329364570789e-05, "std": 0.052846092730760574, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19282352924346924, "max": 0.5817168354988098, "mean": -0.0005141475703567266, "std": 0.04106360301375389, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41765207052230835, "max": 0.3718544840812683, "mean": 6.159986696729902e-06, "std": 0.02162080444395542, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21428614854812622, "max": 0.7470263838768005, "mean": 0.6495206356048584, "std": 0.05435969680547714, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20919783413410187, "max": 0.19538012146949768, "mean": 4.023606743430719e-05, "std": 0.03946175053715706, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32906630635261536, "max": 0.25917014479637146, "mean": -0.003227022010833025, "std": 0.05624230206012726, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20558328926563263, "max": 0.2543526589870453, "mean": 5.4226169595494866e-05, "std": 0.038564346730709076, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.239154815673828, "max": 6.927591800689697, "mean": 0.04829341918230057, "std": 1.3845902681350708, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20949970185756683, "max": 0.22989487648010254, "mean": -5.106569460622268e-06, "std": 0.0413125716149807, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04377944767475128, "max": 0.035965293645858765, "mean": 6.696500349789858e-07, "std": 0.012799888849258423, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39747685194015503, "max": 0.3446802794933319, "mean": -5.5516902648378164e-05, "std": 0.0423889197409153, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05503125116229057, "max": 0.06271757930517197, "mean": 0.00036430457839742303, "std": 0.018672339618206024, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.35033905506134033, "max": 1.0429264307022095, "mean": 0.7893730998039246, "std": 0.048677314072847366, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3334490656852722, "max": 0.38581615686416626, "mean": -0.00016950252756942064, "std": 0.0414799265563488, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.1571725308895111, "max": 0.059094030410051346, "mean": -0.031832072883844376, "std": 0.025125639513134956, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6957246661186218, "max": 0.4681403636932373, "mean": -8.918362436816096e-05, "std": 0.051792457699775696, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24794545769691467, "max": 0.32831111550331116, "mean": -0.000254548795055598, "std": 0.04142748937010765, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.286994069814682, "max": 0.35009774565696716, "mean": -2.1362816369219217e-06, "std": 0.0242360457777977, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.1966284215450287, "max": 0.7790648937225342, "mean": 0.6702556014060974, "std": 0.058683399111032486, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22847537696361542, "max": 0.23085317015647888, "mean": -1.998914376599714e-05, "std": 0.04043750837445259, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.2196640521287918, "max": 0.2406841218471527, "mean": 0.0007778428844176233, "std": 0.05581061542034149, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21546684205532074, "max": 0.22625623643398285, "mean": -7.170689787017182e-05, "std": 0.039373625069856644, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.899069786071777, "max": 9.061844825744629, "mean": -0.0012379959225654602, "std": 1.8475514650344849, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2690274119377136, "max": 0.2585972249507904, "mean": 4.365673885331489e-05, "std": 0.038405876606702805, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05762965977191925, "max": 0.057730112224817276, "mean": 0.00035032647429034114, "std": 0.014716975390911102, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2643204629421234, "max": 0.28830888867378235, "mean": -6.177595059853047e-05, "std": 0.03907199949026108, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04382891580462456, "max": 0.03727584704756737, "mean": -8.995864482130855e-05, "std": 0.013357071205973625, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3394246995449066, "max": 1.0903522968292236, "mean": 0.8637199997901917, "std": 0.06381762027740479, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4231264889240265, "max": 0.41881492733955383, "mean": 0.00031262467382475734, "std": 0.04350043460726738, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21452167630195618, "max": 0.1706276834011078, "mean": -0.029481077566742897, "std": 0.03191966935992241, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5986213088035583, "max": 0.5590333342552185, "mean": -0.00015086884377524257, "std": 0.05344516038894653, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17835262417793274, "max": 0.3764508068561554, "mean": 0.0013586997520178556, "std": 0.03730103746056557, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3942283093929291, "max": 0.3688967823982239, "mean": 3.6990095395594835e-05, "std": 0.028617417439818382, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2902565002441406, "max": 0.8266182541847229, "mean": 0.7055412530899048, "std": 0.06787826120853424, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9262580275535583, "max": 1.0264337062835693, "mean": -2.6147403332288377e-05, "std": 0.04762481153011322, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8780329823493958, "max": 0.8147000074386597, "mean": -0.0003064283519051969, "std": 0.09549984335899353, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.2694474458694458, "max": 0.2405342310667038, "mean": -2.2794924007030204e-05, "std": 0.03895170986652374, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.725736618041992, "max": 22.834732055664062, "mean": -0.09184679388999939, "std": 4.068049430847168, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22741694748401642, "max": 0.2447165697813034, "mean": -2.5723496946739033e-05, "std": 0.03863721713423729, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06024840846657753, "max": 0.04582807794213295, "mean": -0.00014292271225713193, "std": 0.014692682772874832, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.337954580783844, "max": 0.3742024004459381, "mean": 7.330418156925589e-06, "std": 0.04081300273537636, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04640491306781769, "max": 0.19541829824447632, "mean": 0.00027370243333280087, "std": 0.013559137471020222, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3744112551212311, "max": 1.1277745962142944, "mean": 0.8900341987609863, "std": 0.06396359950304031, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4476565718650818, "max": 0.5421170592308044, "mean": 2.477337693562731e-05, "std": 0.04556567594408989, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.2238994538784027, "max": 0.0882241502404213, "mean": -0.03201638162136078, "std": 0.03775238245725632, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7243073582649231, "max": 0.6882233619689941, "mean": 3.4276417864020914e-05, "std": 0.05177783966064453, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17440874874591827, "max": 0.2182954102754593, "mean": 4.099373472854495e-05, "std": 0.0317707397043705, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.33985471725463867, "max": 0.3734351098537445, "mean": 4.3027404899476096e-05, "std": 0.03413975238800049, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31756407022476196, "max": 1.2844599485397339, "mean": 0.6014232039451599, "std": 0.08331646770238876, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2830894887447357, "max": 0.260119765996933, "mean": -2.825315732479794e-06, "std": 0.03598077595233917, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23531799018383026, "max": 0.20526045560836792, "mean": 0.00023797567700967193, "std": 0.05601158365607262, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.43513408303260803, "max": 0.324799120426178, "mean": 2.434128509776201e-05, "std": 0.03413143381476402, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.539924144744873, "max": 7.305825233459473, "mean": -0.007350243628025055, "std": 0.6986610889434814, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.3433971107006073, "max": 0.36268630623817444, "mean": 0.00010339625441702083, "std": 0.047828007489442825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07370211184024811, "max": 0.06033240258693695, "mean": 0.0009340607211925089, "std": 0.014942350797355175, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2555631995201111, "max": 0.28619974851608276, "mean": 4.566820280160755e-06, "std": 0.04155479371547699, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05527225881814957, "max": 0.0627666711807251, "mean": 0.00013802105968352407, "std": 0.0071632144972682, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49384805560112, "max": 1.2211062908172607, "mean": 1.0134272575378418, "std": 0.11744718253612518, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.093487024307251, "max": 1.046884298324585, "mean": -4.944120883010328e-05, "std": 0.052408553659915924, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22308824956417084, "max": 0.17253872752189636, "mean": -0.027238916605710983, "std": 0.036325786262750626, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8834213018417358, "max": 0.921511173248291, "mean": -0.00014601324801333249, "std": 0.05328161269426346, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17091798782348633, "max": 0.3795103430747986, "mean": 0.0033677970059216022, "std": 0.039878927171230316, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7767993211746216, "max": 0.7229223251342773, "mean": 1.8964092305395752e-05, "std": 0.04616083949804306, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3385705351829529, "max": 1.4257850646972656, "mean": 0.948320209980011, "std": 0.20674099028110504, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7456356287002563, "max": 1.7042957544326782, "mean": 0.00022721664572600275, "std": 0.1586850881576538, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.1983858346939087, "max": 1.0988513231277466, "mean": -0.009531477466225624, "std": 0.20368283987045288, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4208756983280182, "max": 0.4265652298927307, "mean": 6.4577761804685e-05, "std": 0.0480157844722271, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.72553825378418, "max": 19.520837783813477, "mean": -0.2481747567653656, "std": 4.772479057312012, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32345694303512573, "max": 0.4378505349159241, "mean": -1.1984889169980306e-05, "std": 0.04616131633520126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03403974324464798, "max": 0.03704509884119034, "mean": 0.0006423466256819665, "std": 0.012919273227453232, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7029122710227966, "max": 0.6650063395500183, "mean": 4.321677261032164e-05, "std": 0.05788154527544975, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07217518985271454, "max": 0.06747341901063919, "mean": -0.00013201506226323545, "std": 0.012908914126455784, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38026899099349976, "max": 1.3915380239486694, "mean": 1.0665700435638428, "std": 0.2197078913450241, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6161525845527649, "max": 0.7168518304824829, "mean": 0.00011199730215594172, "std": 0.058020394295454025, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21944588422775269, "max": 0.22491848468780518, "mean": 0.00621908949688077, "std": 0.049715615808963776, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6295903325080872, "max": 0.8891246914863586, "mean": 1.184111533802934e-05, "std": 0.023527733981609344, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5063257217407227, "max": 0.4734645485877991, "mean": -0.0030142185278236866, "std": 0.06923094391822815, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.537803590297699, "max": 1.1795684099197388, "mean": 0.7827014327049255, "std": 0.09878505766391754, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2665232717990875, "max": 0.21241135895252228, "mean": -0.00022294482914730906, "std": 0.05399605259299278, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23782978951931, "max": 0.014834473840892315, "mean": -0.04395260661840439, "std": 0.034306950867176056, "sparsity": 0.0, "shape": [ 100 ] } } }