{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.4310249388217926, "max": 0.29892200231552124, "mean": -0.0025504794903099537, "std": 0.0425548329949379, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06312082707881927, "max": 0.10854886472225189, "mean": 0.000634247378911823, "std": 0.03414047509431839, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4126858711242676, "max": 0.8365619778633118, "mean": -0.00020620696886908263, "std": 0.02410798706114292, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.1163593977689743, "max": 0.32443463802337646, "mean": -0.0009363778517581522, "std": 0.019653797149658203, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.8154137134552, "max": 2.8935482501983643, "mean": -0.0003568639513105154, "std": 0.6153793334960938, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2813769578933716, "max": 0.38245514035224915, "mean": 0.00042411635513417423, "std": 0.04274803400039673, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22421328723430634, "max": 0.21138469874858856, "mean": -0.004506870172917843, "std": 0.04105628281831741, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4279993176460266, "max": 0.47548574209213257, "mean": 4.261187768861419e-06, "std": 0.02450713701546192, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.327997088432312, "max": 0.15884317457675934, "mean": -0.04679153859615326, "std": 0.05176762491464615, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4111199676990509, "max": 0.35511136054992676, "mean": -0.00012967045768164098, "std": 0.02359858900308609, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.23166728019714355, "max": 0.26478779315948486, "mean": -0.029217107221484184, "std": 0.0495423898100853, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2546941041946411, "max": 0.8268164992332458, "mean": 0.5258853435516357, "std": 0.08176200091838837, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.29768767952919006, "max": 0.26705101132392883, "mean": -0.00042415110510773957, "std": 0.03210066258907318, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09323342144489288, "max": 0.12589719891548157, "mean": 0.0006516888970509171, "std": 0.02578314207494259, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.2915492653846741, "max": 0.2830723226070404, "mean": -7.510973955504596e-05, "std": 0.03093201108276844, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.933852195739746, "max": 5.848132610321045, "mean": -0.009441309608519077, "std": 1.2997525930404663, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4259975850582123, "max": 0.34512922167778015, "mean": 9.808027243707329e-05, "std": 0.029951922595500946, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028870832175016403, "max": 0.027608035132288933, "mean": -0.0003159761254210025, "std": 0.012566526420414448, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4554309844970703, "max": 0.44925424456596375, "mean": 2.2834456103737466e-05, "std": 0.023853331804275513, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08927308022975922, "max": 0.09165928512811661, "mean": 0.002274596830829978, "std": 0.019546369090676308, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26676347851753235, "max": 1.06475031375885, "mean": 0.5317091345787048, "std": 0.1056147962808609, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5758013129234314, "max": 0.60973060131073, "mean": -0.00043392262887209654, "std": 0.03859521821141243, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18311595916748047, "max": 0.045692577958106995, "mean": -0.02953081764280796, "std": 0.04277201369404793, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.169153094291687, "max": 1.6363517045974731, "mean": 0.00031960621709004045, "std": 0.027692886069417, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16331635415554047, "max": 0.20692557096481323, "mean": -0.02113202027976513, "std": 0.0279996357858181, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22424264252185822, "max": 0.8506074547767639, "mean": 0.487909197807312, "std": 0.0759621262550354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25719332695007324, "max": 0.3069766163825989, "mean": -8.219409210141748e-06, "std": 0.033469025045633316, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.0958663746714592, "max": 0.1111140251159668, "mean": 6.868487980682403e-05, "std": 0.02699616365134716, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2987782061100006, "max": 0.2982846796512604, "mean": 5.100301495986059e-05, "std": 0.03253886476159096, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.194380760192871, "max": 5.11414098739624, "mean": -0.01477175671607256, "std": 1.1622190475463867, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.3454170525074005, "max": 0.3440503478050232, "mean": 7.885548257036135e-05, "std": 0.03005816601216793, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.036366600543260574, "max": 0.033365145325660706, "mean": -0.00014353547885548323, "std": 0.013023492880165577, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3166007697582245, "max": 0.37669771909713745, "mean": -2.1011579519836232e-05, "std": 0.024054987356066704, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10603390634059906, "max": 0.12274863570928574, "mean": -0.0019654321949929, "std": 0.028894905000925064, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.311918169260025, "max": 1.1306103467941284, "mean": 0.666860818862915, "std": 0.0989983081817627, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8729648590087891, "max": 0.6280122995376587, "mean": 0.0016747020417824388, "std": 0.047436561435461044, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27260690927505493, "max": 0.03427213430404663, "mean": -0.04665624350309372, "std": 0.04072800651192665, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9236066937446594, "max": 0.9658545255661011, "mean": 0.0010218569077551365, "std": 0.04070160537958145, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14540822803974152, "max": 0.07539817690849304, "mean": -0.009104669094085693, "std": 0.025749636813998222, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23975443840026855, "max": 0.7185607552528381, "mean": 0.44753360748291016, "std": 0.06007208302617073, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2746535837650299, "max": 0.2996414601802826, "mean": 8.662165782880038e-06, "std": 0.03547052666544914, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11975187063217163, "max": 0.11919566243886948, "mean": 0.0007501145591959357, "std": 0.02767573855817318, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2831306457519531, "max": 0.2817768156528473, "mean": -7.67814417486079e-05, "std": 0.035099856555461884, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5266785621643066, "max": 2.5387556552886963, "mean": 0.026949256658554077, "std": 0.5885584354400635, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.22260574996471405, "max": 0.2732996642589569, "mean": 2.9508364605135284e-06, "std": 0.030731212347745895, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.0335291288793087, "max": 0.031390510499477386, "mean": 0.00011758864275179803, "std": 0.012400473468005657, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23621369898319244, "max": 0.23289528489112854, "mean": 5.6726221373537555e-05, "std": 0.025696825236082077, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13667543232440948, "max": 0.12879958748817444, "mean": -0.005504202097654343, "std": 0.040019236505031586, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35455986857414246, "max": 1.1826062202453613, "mean": 0.7107979655265808, "std": 0.10437346249818802, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6191003918647766, "max": 0.5564218759536743, "mean": 0.0011606740299612284, "std": 0.04611353576183319, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.19018200039863586, "max": 0.02485579438507557, "mean": -0.03489173576235771, "std": 0.028727849945425987, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1358468532562256, "max": 0.9746898412704468, "mean": 0.00035939598456025124, "std": 0.04234171286225319, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.6019405722618103, "max": 0.06334464251995087, "mean": -0.00488577876240015, "std": 0.028712771832942963, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3755652904510498, "max": 0.9507709741592407, "mean": 0.5931843519210815, "std": 0.0686625987291336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3929532766342163, "max": 0.37091946601867676, "mean": 7.025484228506684e-05, "std": 0.03718522936105728, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11978376656770706, "max": 0.13744011521339417, "mean": 0.0009335688664577901, "std": 0.029282478615641594, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6229383945465088, "max": 0.5121926069259644, "mean": 1.5349294699262828e-05, "std": 0.03643808513879776, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.242501258850098, "max": 8.848700523376465, "mean": -0.10966195166110992, "std": 1.7074756622314453, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2780378460884094, "max": 0.24072492122650146, "mean": 5.223074913374148e-05, "std": 0.03261224925518036, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05211928114295006, "max": 0.03976155444979668, "mean": 9.01424209587276e-05, "std": 0.012970111332833767, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23169712722301483, "max": 0.23602090775966644, "mean": -2.2195828933035955e-05, "std": 0.029388954862952232, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20550638437271118, "max": 0.10590175539255142, "mean": -0.004026752896606922, "std": 0.03266817331314087, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3396901488304138, "max": 1.022835612297058, "mean": 0.7008680701255798, "std": 0.09710492938756943, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5683938264846802, "max": 0.8381193280220032, "mean": 0.00041519341175444424, "std": 0.04229409247636795, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21325451135635376, "max": 0.03037591464817524, "mean": -0.03223013877868652, "std": 0.026610074564814568, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7600710391998291, "max": 0.7236490845680237, "mean": -1.6499760022270493e-05, "std": 0.03683502599596977, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26496192812919617, "max": 0.10684733092784882, "mean": -0.0030161943286657333, "std": 0.028908496722579002, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28418251872062683, "max": 0.7011516094207764, "mean": 0.499736487865448, "std": 0.047200758010149, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.28040796518325806, "max": 0.23536527156829834, "mean": -0.00011076986265834421, "std": 0.03875643387436867, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15493866801261902, "max": 0.12730616331100464, "mean": -0.002237653825432062, "std": 0.03343982622027397, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.4170800745487213, "max": 0.6621686220169067, "mean": -1.8650103811523877e-05, "std": 0.039095137268304825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.2626214027404785, "max": 4.750005722045898, "mean": -0.020378686487674713, "std": 1.0105632543563843, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24659502506256104, "max": 0.2085939198732376, "mean": 4.402307604323141e-05, "std": 0.033962100744247437, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03477818891406059, "max": 0.045115940272808075, "mean": -1.805905776564032e-05, "std": 0.012638943269848824, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20247574150562286, "max": 0.20785965025424957, "mean": -2.8977701731491834e-05, "std": 0.031019993126392365, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.2010650485754013, "max": 0.11400442570447922, "mean": -0.002901929896324873, "std": 0.03455876186490059, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3669453561306, "max": 1.068376898765564, "mean": 0.6706770658493042, "std": 0.06678663939237595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.4009625017642975, "max": 0.5047707557678223, "mean": -3.825509702437557e-05, "std": 0.04113015532493591, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12967447936534882, "max": 0.026864072307944298, "mean": -0.03057170659303665, "std": 0.021967768669128418, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4517863094806671, "max": 0.4363614320755005, "mean": 7.544152322225273e-05, "std": 0.03489035367965698, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2692056894302368, "max": 0.07339853048324585, "mean": -0.0010960557265207171, "std": 0.023164359852671623, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.2873815894126892, "max": 0.6924071311950684, "mean": 0.5248355865478516, "std": 0.048200905323028564, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22408804297447205, "max": 0.22555872797966003, "mean": 1.55975158122601e-05, "std": 0.038948412984609604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13717913627624512, "max": 0.10996447503566742, "mean": 0.00024089610087685287, "std": 0.02930767834186554, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37717288732528687, "max": 0.43975257873535156, "mean": -9.77939271251671e-06, "std": 0.03928566351532936, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.868288516998291, "max": 5.028470516204834, "mean": 0.009761041030287743, "std": 0.8478302955627441, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22423577308654785, "max": 0.221679225564003, "mean": -3.3901324059115723e-07, "std": 0.034409064799547195, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.0438535250723362, "max": 0.03604500740766525, "mean": -0.00025803165044635534, "std": 0.0120812077075243, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.2146783322095871, "max": 0.1904102861881256, "mean": -1.7072843547794037e-05, "std": 0.03153547644615173, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18190543353557587, "max": 0.12149464339017868, "mean": -0.0023945681750774384, "std": 0.04129800572991371, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4226498305797577, "max": 0.9518083333969116, "mean": 0.6629198789596558, "std": 0.057358019053936005, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.372251033782959, "max": 0.47781607508659363, "mean": -8.197914576157928e-05, "std": 0.040889132767915726, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20997951924800873, "max": 0.027235740795731544, "mean": -0.030272582545876503, "std": 0.021444976329803467, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.34334975481033325, "max": 0.7389779686927795, "mean": 8.186099876184016e-05, "std": 0.034765809774398804, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2415534406900406, "max": 0.050704218447208405, "mean": -0.001192720839753747, "std": 0.02049700915813446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.3061361312866211, "max": 0.6592679023742676, "mean": 0.5253557562828064, "std": 0.04659049212932587, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3061867356300354, "max": 0.2188880741596222, "mean": 7.013476715655997e-05, "std": 0.03949468210339546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.15020529925823212, "max": 0.13198836147785187, "mean": 0.00033842536504380405, "std": 0.030562784522771835, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25926315784454346, "max": 0.20377042889595032, "mean": 3.10853029077407e-05, "std": 0.039484549313783646, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3498988151550293, "max": 2.389754056930542, "mean": -0.02631671540439129, "std": 0.4510843753814697, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.19007518887519836, "max": 0.2122075855731964, "mean": 3.708741132868454e-05, "std": 0.03479320555925369, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03199063614010811, "max": 0.03580143302679062, "mean": -0.00019849740783683956, "std": 0.012292149476706982, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.19011414051055908, "max": 0.17155633866786957, "mean": -6.832154031144455e-05, "std": 0.0321698896586895, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.14033056795597076, "max": 0.13829410076141357, "mean": -0.0025126286782324314, "std": 0.05131656676530838, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4672001600265503, "max": 0.9642724394798279, "mean": 0.6692001819610596, "std": 0.05353807285428047, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.32512417435646057, "max": 0.3099176585674286, "mean": -8.536699169781059e-07, "std": 0.04094506427645683, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12580342590808868, "max": 0.025558948516845703, "mean": -0.030726371333003044, "std": 0.019892578944563866, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.44301649928092957, "max": 0.448657363653183, "mean": 9.49525274336338e-05, "std": 0.03511860594153404, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22610187530517578, "max": 0.0521467961370945, "mean": -0.0011865891283378005, "std": 0.018514476716518402, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3391834497451782, "max": 0.7460214495658875, "mean": 0.5588462352752686, "std": 0.04179359972476959, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.2743752598762512, "max": 0.27987486124038696, "mean": 2.0352064893813804e-05, "std": 0.04105662927031517, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13770411908626556, "max": 0.14076648652553558, "mean": 0.0004916964680887759, "std": 0.026698192581534386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4935597777366638, "max": 0.3583414554595947, "mean": 8.887881995178759e-05, "std": 0.04069438576698303, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.311286687850952, "max": 1.7559641599655151, "mean": -0.02118358016014099, "std": 0.5012499094009399, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2191997468471527, "max": 0.19883301854133606, "mean": -4.048732444061898e-05, "std": 0.03423238918185234, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.041594695299863815, "max": 0.039164409041404724, "mean": -0.00013954236055724323, "std": 0.012892705388367176, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17905071377754211, "max": 0.18448761105537415, "mean": 4.79043010273017e-05, "std": 0.03155573084950447, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.1810525357723236, "max": 0.18478283286094666, "mean": -0.0022157104685902596, "std": 0.054884668439626694, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.47422513365745544, "max": 1.034525752067566, "mean": 0.6455625891685486, "std": 0.05127067118883133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2727859616279602, "max": 0.31039154529571533, "mean": 0.00011223299225093797, "std": 0.04068140313029289, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10606198012828827, "max": 0.026645641773939133, "mean": -0.02954702451825142, "std": 0.01799139380455017, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.34065425395965576, "max": 0.33199548721313477, "mean": 5.238396261120215e-05, "std": 0.034412581473588943, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18290212750434875, "max": 0.042540330439805984, "mean": -0.001063595642335713, "std": 0.017244886606931686, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32540637254714966, "max": 0.6927012801170349, "mean": 0.511530876159668, "std": 0.037588104605674744, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23500792682170868, "max": 0.22661413252353668, "mean": -3.6375215131556615e-05, "std": 0.039175912737846375, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11630432307720184, "max": 0.1327952891588211, "mean": 0.00015614689618814737, "std": 0.02927626110613346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.35499081015586853, "max": 0.28717586398124695, "mean": 7.152914804464672e-06, "std": 0.03924452140927315, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.1564154624938965, "max": 3.564419746398926, "mean": -0.011666063219308853, "std": 0.6851950883865356, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21194273233413696, "max": 0.21046526730060577, "mean": 3.472749813226983e-05, "std": 0.0344846174120903, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03606359288096428, "max": 0.0485043041408062, "mean": 0.0007934037130326033, "std": 0.01287116389721632, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21187099814414978, "max": 0.19423909485340118, "mean": -1.3818132629239699e-06, "std": 0.03169572353363037, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.1876450628042221, "max": 0.1781487911939621, "mean": -0.0028378514107316732, "std": 0.05868522822856903, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4746300280094147, "max": 1.0532299280166626, "mean": 0.6519026756286621, "std": 0.0511440671980381, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24888233840465546, "max": 0.329919695854187, "mean": 0.00018074009858537465, "std": 0.04056980833411217, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.1257043331861496, "max": 0.024808209389448166, "mean": -0.03052573651075363, "std": 0.01766115613281727, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4241631031036377, "max": 0.48552921414375305, "mean": -1.5207942851702683e-06, "std": 0.03539673238992691, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15242178738117218, "max": 0.0436730720102787, "mean": 4.8590598453301936e-05, "std": 0.01490879151970148, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.3154313564300537, "max": 0.68807452917099, "mean": 0.5530612468719482, "std": 0.041024595499038696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20784315466880798, "max": 0.22137802839279175, "mean": 3.199603088432923e-05, "std": 0.038299061357975006, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13870200514793396, "max": 0.11339821666479111, "mean": 2.9128044843673706e-05, "std": 0.025894545018672943, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4055723249912262, "max": 0.37375950813293457, "mean": 2.5988052584580146e-05, "std": 0.038179732859134674, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7928740978240967, "max": 2.885420560836792, "mean": 0.0012225983664393425, "std": 0.5186418294906616, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20435325801372528, "max": 0.1985306441783905, "mean": 2.9608720069518313e-05, "std": 0.03429684415459633, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.051018889993429184, "max": 0.040129613131284714, "mean": -0.00042048803879879415, "std": 0.013424505479633808, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19798687100410461, "max": 0.20356523990631104, "mean": -1.2490939298004378e-05, "std": 0.03180477395653725, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.1941322237253189, "max": 0.19617649912834167, "mean": -0.002969961380586028, "std": 0.06259642541408539, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3487941026687622, "max": 1.0952281951904297, "mean": 0.6676215529441833, "std": 0.05664284899830818, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22712087631225586, "max": 0.25315943360328674, "mean": 0.00035851544816978276, "std": 0.04075949266552925, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09184330701828003, "max": 0.04372864216566086, "mean": -0.030109990388154984, "std": 0.017667723819613457, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35518717765808105, "max": 0.30635109543800354, "mean": -4.3967633246211335e-05, "std": 0.037122078239917755, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16265232861042023, "max": 0.06366349011659622, "mean": -8.268894453067333e-05, "std": 0.019441038370132446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.3488224744796753, "max": 0.7298842668533325, "mean": 0.5426357388496399, "std": 0.039679452776908875, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.22033143043518066, "max": 0.22433431446552277, "mean": -1.1077730960096233e-05, "std": 0.03923030197620392, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11923559010028839, "max": 0.1716114580631256, "mean": 0.00028718815883621573, "std": 0.025185901671648026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.2481980323791504, "max": 0.3025566339492798, "mean": -3.676430060295388e-05, "std": 0.0389297790825367, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5254225730895996, "max": 3.736085891723633, "mean": 0.01585158333182335, "std": 0.7859480977058411, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21972878277301788, "max": 0.23833929002285004, "mean": -1.325977427768521e-05, "std": 0.03630264848470688, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04748326912522316, "max": 0.051650550216436386, "mean": 0.0004778398433700204, "std": 0.01352317538112402, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21533912420272827, "max": 0.21868844330310822, "mean": 5.647652506013401e-05, "std": 0.03361491113901138, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21255744993686676, "max": 0.23268213868141174, "mean": -0.005099742207676172, "std": 0.06193498894572258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36217188835144043, "max": 1.112847089767456, "mean": 0.69975745677948, "std": 0.05501763895153999, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23635224997997284, "max": 0.24658624827861786, "mean": 0.00046343228314071894, "std": 0.041268426924943924, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09862525016069412, "max": 0.06863635033369064, "mean": -0.03145936504006386, "std": 0.018182674422860146, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30422019958496094, "max": 0.3540525734424591, "mean": -8.221832831623033e-05, "std": 0.04027421772480011, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.1533002257347107, "max": 0.150687575340271, "mean": 0.00025470374384894967, "std": 0.023078717291355133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9982896447181702, "max": 1.017301082611084, "mean": 1.0001298189163208, "std": 0.0026745295617729425, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031271953135728836, "max": 0.03127208724617958, "mean": -1.929010068124626e-05, "std": 0.01804104819893837, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.03122810088098049, "max": 0.030984606593847275, "mean": -0.0010841733310371637, "std": 0.0179507527500391, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03126660734415054, "max": 0.03127255663275719, "mean": 3.5378593565837946e-06, "std": 0.018041487783193588, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031172683462500572, "max": 0.031167395412921906, "mean": 0.0003339074901305139, "std": 0.01806284487247467, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0006182725192047656, "max": 0.0004164598067291081, "mean": 1.3710750863538124e-06, "std": 0.0001378587185172364, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9979904890060425, "max": 1.0161197185516357, "mean": 1.0013301372528076, "std": 0.004817315377295017, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.032745394855737686, "max": 0.03283839672803879, "mean": -6.682760158582823e-06, "std": 0.018042659386992455, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03276297450065613, "max": 0.0325884111225605, "mean": -0.00013115988986101002, "std": 0.017956366762518883, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.0011839725775644183, "max": 0.0011610303772613406, "mean": 3.635812220181833e-07, "std": 0.00021423342695925385, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005281989579088986, "max": 0.0004011568380519748, "mean": 2.2640601855528075e-06, "std": 0.00012689748837146908, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.383169025182724, "max": 0.725769579410553, "mean": 0.5810222625732422, "std": 0.039563409984111786, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23967966437339783, "max": 0.19745716452598572, "mean": 2.6129977413802408e-05, "std": 0.0374654158949852, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.1195952445268631, "max": 0.16743028163909912, "mean": 0.0009849121561273932, "std": 0.02763625606894493, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24753768742084503, "max": 0.502853274345398, "mean": -4.9970258260145783e-05, "std": 0.0376228392124176, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9648470878601074, "max": 3.7909820079803467, "mean": -0.0036168191581964493, "std": 0.6834573745727539, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22818903625011444, "max": 0.25305306911468506, "mean": -1.1425543561927043e-05, "std": 0.037434399127960205, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07215739786624908, "max": 0.08118511736392975, "mean": -0.0005145666655153036, "std": 0.015683691948652267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.2285011112689972, "max": 0.25927454233169556, "mean": -2.8810776711907238e-05, "std": 0.03542128577828407, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20174317061901093, "max": 0.21631476283073425, "mean": -0.005539278965443373, "std": 0.06842140108346939, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.4053976237773895, "max": 1.1997506618499756, "mean": 0.7383711338043213, "std": 0.05650194734334946, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.2226068526506424, "max": 0.24658025801181793, "mean": 0.0005210487288422883, "std": 0.04133579134941101, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10394058376550674, "max": 0.02423257753252983, "mean": -0.032700441777706146, "std": 0.018963389098644257, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.452515184879303, "max": 0.4254130423069, "mean": -0.0004341741732787341, "std": 0.04689616709947586, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25287455320358276, "max": 0.4728158116340637, "mean": 0.003204880515113473, "std": 0.04463134706020355, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.31750747561454773, "max": 0.333750456571579, "mean": -2.5235824068658985e-05, "std": 0.021287381649017334, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3244800865650177, "max": 0.6913307905197144, "mean": 0.5712176561355591, "std": 0.045165594667196274, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16547071933746338, "max": 0.1755398064851761, "mean": -4.8899608373176306e-05, "std": 0.033180754631757736, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18801826238632202, "max": 0.1438588947057724, "mean": 4.4942658860236406e-05, "std": 0.029767248779535294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.38313359022140503, "max": 0.24818716943264008, "mean": -9.953633707482368e-06, "std": 0.03276177868247032, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6768205165863037, "max": 3.3089771270751953, "mean": -0.014381470158696175, "std": 0.9868160486221313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23584222793579102, "max": 0.24873286485671997, "mean": -1.8046124750981107e-05, "std": 0.0416971780359745, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07315867394208908, "max": 0.15554027259349823, "mean": 0.0006676731863990426, "std": 0.02520027756690979, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2670559585094452, "max": 0.24887487292289734, "mean": -1.537521166028455e-05, "std": 0.04013797268271446, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.1908693015575409, "max": 0.1960526406764984, "mean": -0.001238689525052905, "std": 0.06672189384698868, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.3290148973464966, "max": 1.0089884996414185, "mean": 0.719682514667511, "std": 0.053548477590084076, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23323918879032135, "max": 0.2469726949930191, "mean": 0.00018311971507500857, "std": 0.04089980572462082, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11503507941961288, "max": 0.019024236127734184, "mean": -0.04251422733068466, "std": 0.018931886181235313, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3927544355392456, "max": 0.4104294776916504, "mean": -2.164382931368891e-05, "std": 0.04853343218564987, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6971645355224609, "max": 0.414955198764801, "mean": 0.0008486253209412098, "std": 0.060451194643974304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.001029345323331654, "max": 1.0005033016204834, "mean": 0.00048820505617186427, "std": 0.022088995203375816, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.99776691198349, "max": 1.0153907537460327, "mean": 0.9997058510780334, "std": 0.0012300637317821383, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.031274545937776566, "max": 0.03127707168459892, "mean": -2.1027797629358247e-05, "std": 0.018032420426607132, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031217729672789574, "max": 0.031233638525009155, "mean": -0.0006770637119188905, "std": 0.017827108502388, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03128187730908394, "max": 0.031268589198589325, "mean": -8.834878826746717e-06, "std": 0.018031446263194084, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031228115782141685, "max": 0.03124588541686535, "mean": -0.0007299837889149785, "std": 0.017942119389772415, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0004204909782856703, "max": 0.00033413738128729165, "mean": -3.152099679937237e-06, "std": 0.0001164414279628545, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.997612476348877, "max": 1.018494963645935, "mean": 1.0012025833129883, "std": 0.0055990261025726795, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.032435424625873566, "max": 0.032380323857069016, "mean": -1.7302188553003361e-06, "std": 0.018027864396572113, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.032131362706422806, "max": 0.031162748113274574, "mean": -0.00037396998959593475, "std": 0.01804373785853386, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0012890547513961792, "max": 0.001122222631238401, "mean": -8.950937626650557e-07, "std": 0.00020965519070159644, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00034396781120449305, "max": 0.00029873003950342536, "mean": -3.7820796023879666e-06, "std": 0.000104848513728939, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.2348298579454422, "max": 0.27300530672073364, "mean": 6.816113909735577e-06, "std": 0.018809327855706215, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.3214486837387085, "max": 0.7001691460609436, "mean": 0.5819005370140076, "std": 0.04646027460694313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18254612386226654, "max": 0.19860517978668213, "mean": -1.1607673513935879e-05, "std": 0.03318353369832039, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.1615392416715622, "max": 0.13018541038036346, "mean": -0.001078265719115734, "std": 0.03421453759074211, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33349576592445374, "max": 0.31233182549476624, "mean": -1.0118232239619829e-05, "std": 0.032234255224466324, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.8480448722839355, "max": 8.8128080368042, "mean": 0.09380069375038147, "std": 1.6259617805480957, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23474065959453583, "max": 0.24273009598255157, "mean": 4.155310307396576e-05, "std": 0.04085606709122658, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07642843574285507, "max": 0.06617211550474167, "mean": 0.0004827451193705201, "std": 0.01944047026336193, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24762944877147675, "max": 0.2358739972114563, "mean": -3.232937160646543e-06, "std": 0.03943068906664848, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16411840915679932, "max": 0.1619885265827179, "mean": 0.001625007251277566, "std": 0.06529368460178375, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5569814443588257, "max": 0.9541290402412415, "mean": 0.7133999466896057, "std": 0.04144103080034256, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22980599105358124, "max": 0.2567155957221985, "mean": -4.5827197027392685e-05, "std": 0.04057452455163002, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13575804233551025, "max": 0.02213761769235134, "mean": -0.04138356074690819, "std": 0.01845938339829445, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4245927333831787, "max": 0.39355969429016113, "mean": -4.580877430271357e-06, "std": 0.04778376594185829, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6110193133354187, "max": 0.6553415656089783, "mean": 0.001590792671777308, "std": 0.056976497173309326, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.251875638961792, "max": 0.3209821879863739, "mean": -6.120833859313279e-06, "std": 0.019612718373537064, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35964423418045044, "max": 0.6887573599815369, "mean": 0.5708860754966736, "std": 0.04330369085073471, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.2213190197944641, "max": 0.17759515345096588, "mean": -3.466910129645839e-05, "std": 0.03429858386516571, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16418921947479248, "max": 0.23438312113285065, "mean": 0.0003640234936028719, "std": 0.03290766850113869, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.2654394805431366, "max": 0.24140575528144836, "mean": -5.2719900850206614e-05, "std": 0.03389739617705345, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.882589817047119, "max": 5.12019157409668, "mean": 0.04409287869930267, "std": 1.233181118965149, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.2474043071269989, "max": 0.2517080307006836, "mean": 7.239622209453955e-05, "std": 0.0439867228269577, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.0629691556096077, "max": 0.054786957800388336, "mean": 0.0006426851614378393, "std": 0.017202140763401985, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.28832921385765076, "max": 0.2730186879634857, "mean": -5.011680332245305e-05, "std": 0.04298482462763786, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16195480525493622, "max": 0.1713690608739853, "mean": -0.002885536290705204, "std": 0.05930813401937485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5195947885513306, "max": 0.9433215260505676, "mean": 0.713985800743103, "std": 0.0396861806511879, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23872706294059753, "max": 0.24947769939899445, "mean": 0.000464944401755929, "std": 0.04045351594686508, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14595000445842743, "max": 0.041102174669504166, "mean": -0.03972803056240082, "std": 0.020616797730326653, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5366718769073486, "max": 0.5868415236473083, "mean": 5.812449671793729e-06, "std": 0.04885939508676529, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5220040678977966, "max": 0.4962327182292938, "mean": 0.0023680159356445074, "std": 0.05358637124300003, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2740743160247803, "max": 0.31590986251831055, "mean": 1.968129254237283e-06, "std": 0.02004937082529068, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36616218090057373, "max": 0.718187689781189, "mean": 0.5934113264083862, "std": 0.04643949121236801, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21206998825073242, "max": 0.20034025609493256, "mean": 3.0636681913165376e-05, "std": 0.03486590087413788, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18825751543045044, "max": 0.20496514439582825, "mean": 0.000955467636231333, "std": 0.03160287067294121, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2913488745689392, "max": 0.34160566329956055, "mean": -4.710702705779113e-05, "std": 0.03458679839968681, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8994882106781006, "max": 3.406729221343994, "mean": 0.014544591307640076, "std": 0.8605263829231262, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.2257968783378601, "max": 0.2514858543872833, "mean": -3.6003511922899634e-06, "std": 0.042229436337947845, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.055651042610406876, "max": 0.04694758728146553, "mean": -1.666278694756329e-05, "std": 0.015861017629504204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2935076653957367, "max": 0.2909187078475952, "mean": -7.359203209489351e-06, "std": 0.04194429889321327, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12573029100894928, "max": 0.2607214152812958, "mean": -0.003240898484364152, "std": 0.05319065600633621, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45657190680503845, "max": 0.8538610339164734, "mean": 0.7059471011161804, "std": 0.03630220517516136, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5123063325881958, "max": 0.3483346104621887, "mean": 0.00034276110818609595, "std": 0.04019864276051521, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18701618909835815, "max": 0.03957710787653923, "mean": -0.03942158818244934, "std": 0.021421542391180992, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5481660962104797, "max": 0.5603045225143433, "mean": -7.152351463446394e-05, "std": 0.050734106451272964, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5146781802177429, "max": 0.6680049300193787, "mean": 0.002443398116156459, "std": 0.04963434487581253, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.3329000473022461, "max": 0.2665855884552002, "mean": 3.3853375498438254e-06, "std": 0.01938658207654953, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.32180243730545044, "max": 0.7734456062316895, "mean": 0.6512116193771362, "std": 0.04565456882119179, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2506096363067627, "max": 0.2205670177936554, "mean": -2.243723429273814e-06, "std": 0.0365004725754261, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32875651121139526, "max": 0.28859665989875793, "mean": -0.0006945514469407499, "std": 0.03869060054421425, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.31226253509521484, "max": 0.3726266324520111, "mean": 6.49260327918455e-05, "std": 0.03624095767736435, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.75054407119751, "max": 5.848582744598389, "mean": 0.0380375012755394, "std": 1.4184556007385254, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22316591441631317, "max": 0.2069820612668991, "mean": -7.529938011430204e-05, "std": 0.042484965175390244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07815916836261749, "max": 0.051765959709882736, "mean": -0.0009295076015405357, "std": 0.016425304114818573, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3312581181526184, "max": 0.3296850621700287, "mean": -4.723461188405054e-06, "std": 0.04279135540127754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2866402864456177, "max": 0.11266554147005081, "mean": -0.0012074881233274937, "std": 0.04703830927610397, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4860897958278656, "max": 0.8950455784797668, "mean": 0.7378093004226685, "std": 0.039171766489744186, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3630240857601166, "max": 0.2759678065776825, "mean": 5.1290608098497614e-05, "std": 0.04064415767788887, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2490600198507309, "max": 0.04639717563986778, "mean": -0.03930266201496124, "std": 0.023369962349534035, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6307172775268555, "max": 0.6014147996902466, "mean": -6.16723409621045e-05, "std": 0.05311626195907593, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7142688035964966, "max": 0.267661988735199, "mean": 0.0009166492964141071, "std": 0.051358189433813095, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3435579240322113, "max": 0.3038428723812103, "mean": 1.3023259270994458e-07, "std": 0.019134989008307457, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.3500676155090332, "max": 0.7897790670394897, "mean": 0.6390184760093689, "std": 0.04962107539176941, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.2066265493631363, "max": 0.20817363262176514, "mean": -5.989617056911811e-05, "std": 0.037695348262786865, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.2602774202823639, "max": 0.2698180377483368, "mean": -0.00039462913991883397, "std": 0.04474588483572006, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3561350107192993, "max": 0.32447537779808044, "mean": -6.916588063177187e-06, "std": 0.03720375522971153, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.291650295257568, "max": 4.228523254394531, "mean": -0.02643691562116146, "std": 1.0099413394927979, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.2399577796459198, "max": 0.24472706019878387, "mean": -2.5193990950356238e-05, "std": 0.04320961609482765, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06267981976270676, "max": 0.05705071985721588, "mean": 0.0003437635023146868, "std": 0.014168186113238335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4376278221607208, "max": 0.3739663064479828, "mean": 1.456045083614299e-05, "std": 0.04412108287215233, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09702851623296738, "max": 0.17698785662651062, "mean": -0.0006597189931198955, "std": 0.03517333045601845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4217059910297394, "max": 1.0791560411453247, "mean": 0.7486134767532349, "std": 0.04263925552368164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.26739904284477234, "max": 0.298541396856308, "mean": -7.951692532515153e-05, "std": 0.040804121643304825, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18641552329063416, "max": 0.043663352727890015, "mean": -0.036861587315797806, "std": 0.0257096104323864, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4583725333213806, "max": 0.4902479946613312, "mean": 4.34339017374441e-05, "std": 0.05420944094657898, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.2883600890636444, "max": 0.5551440119743347, "mean": -0.0008822724921628833, "std": 0.04795018211007118, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2930268347263336, "max": 0.3230960965156555, "mean": 6.1333103076322e-06, "std": 0.01996854692697525, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29084402322769165, "max": 0.768223226070404, "mean": 0.650917649269104, "std": 0.05231805518269539, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.24454748630523682, "max": 0.2624610364437103, "mean": -5.949783371761441e-06, "std": 0.039611514657735825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2689764201641083, "max": 0.20118767023086548, "mean": -0.000883190892636776, "std": 0.05189211666584015, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.27367931604385376, "max": 0.25521987676620483, "mean": 4.683277438743971e-06, "std": 0.038708530366420746, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -13.039263725280762, "max": 16.03864097595215, "mean": 0.03343699499964714, "std": 1.9974913597106934, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.2084328532218933, "max": 0.2273532599210739, "mean": -7.200734398793429e-05, "std": 0.040553417056798935, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06970705837011337, "max": 0.06357143819332123, "mean": 0.00015784359129611403, "std": 0.014761138707399368, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46569308638572693, "max": 0.3209618628025055, "mean": 1.970405901374761e-05, "std": 0.04058854654431343, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06452719122171402, "max": 0.11591468751430511, "mean": 0.0011942506534978747, "std": 0.024729805067181587, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37459689378738403, "max": 0.9426000118255615, "mean": 0.7511058449745178, "std": 0.040696173906326294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2817957103252411, "max": 0.27507483959198, "mean": -0.00016845125355757773, "std": 0.040994707494974136, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19982005655765533, "max": 0.05116043612360954, "mean": -0.03206067159771919, "std": 0.025184709578752518, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6629015207290649, "max": 0.5394555330276489, "mean": -4.886999522568658e-05, "std": 0.052846018224954605, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1941312849521637, "max": 0.5856620669364929, "mean": -0.0005102052818983793, "std": 0.04117872565984726, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41802144050598145, "max": 0.37218335270881653, "mean": 6.143730843177764e-06, "std": 0.021620716899633408, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.214231476187706, "max": 0.7551652193069458, "mean": 0.6496015787124634, "std": 0.05449988320469856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.21102380752563477, "max": 0.19707706570625305, "mean": 4.027696923003532e-05, "std": 0.03946160152554512, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.3312985599040985, "max": 0.2609282433986664, "mean": -0.0032433252781629562, "std": 0.05640969052910805, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20687410235404968, "max": 0.25594964623451233, "mean": 5.426290590548888e-05, "std": 0.038564227521419525, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.281450271606445, "max": 6.974554538726807, "mean": 0.04850253462791443, "std": 1.3900896310806274, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.2110043168067932, "max": 0.23172873258590698, "mean": -5.136051640874939e-06, "std": 0.04131242260336876, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04407680407166481, "max": 0.03620957210659981, "mean": 5.837064236402512e-07, "std": 0.012804933823645115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.3980613648891449, "max": 0.34518715739250183, "mean": -5.568802953348495e-05, "std": 0.04238880053162575, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.0554049089550972, "max": 0.06314343214035034, "mean": 0.00036526317126117647, "std": 0.01868700049817562, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.35041460394859314, "max": 1.054603099822998, "mean": 0.7895448207855225, "std": 0.04915067180991173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.33399659395217896, "max": 0.3868362009525299, "mean": -0.00016958778724074364, "std": 0.04147977754473686, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15840038657188416, "max": 0.059087082743644714, "mean": -0.03186880797147751, "std": 0.02521045319736004, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6981510519981384, "max": 0.47227516770362854, "mean": -8.876612992025912e-05, "std": 0.05179238319396973, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.2498706579208374, "max": 0.33086034655570984, "mean": -0.0002500821719877422, "std": 0.04153008759021759, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2874675989151001, "max": 0.3506753444671631, "mean": -2.142998255294515e-06, "std": 0.024235961958765984, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19644968211650848, "max": 0.7875264883041382, "mean": 0.6702861189842224, "std": 0.058757346123456955, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.2307407557964325, "max": 0.23255716264247894, "mean": -1.9847611838486046e-05, "std": 0.04043736308813095, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22115467488765717, "max": 0.24231739342212677, "mean": 0.0007812330732122064, "std": 0.05595459043979645, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21687255799770355, "max": 0.22770829498767853, "mean": -7.165952411014587e-05, "std": 0.03937350586056709, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.959362030029297, "max": 9.123239517211914, "mean": -0.0011855876073241234, "std": 1.8560608625411987, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2711891233921051, "max": 0.2605840563774109, "mean": 4.364762571640313e-05, "std": 0.038405757397413254, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05802099406719208, "max": 0.05812212452292442, "mean": 0.0003513882402330637, "std": 0.014736738055944443, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.26627787947654724, "max": 0.28912854194641113, "mean": -6.142335041658953e-05, "std": 0.03907188028097153, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04412651062011719, "max": 0.03752894699573517, "mean": -9.05310153029859e-05, "std": 0.013374187983572483, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.339313268661499, "max": 1.1022799015045166, "mean": 0.8638956546783447, "std": 0.06418420374393463, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42381733655929565, "max": 0.41949865221977234, "mean": 0.0003125929506495595, "std": 0.04350028932094574, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.2159820944070816, "max": 0.1717892736196518, "mean": -0.02952037751674652, "std": 0.0320223867893219, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.6032647490501404, "max": 0.5633653998374939, "mean": -0.00015064005856402218, "std": 0.053445085883140564, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17956292629241943, "max": 0.37900540232658386, "mean": 0.0013650960754603148, "std": 0.03737950697541237, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3949747383594513, "max": 0.36959531903266907, "mean": 3.693038524943404e-05, "std": 0.028617311269044876, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2902548313140869, "max": 0.835411548614502, "mean": 0.7055742740631104, "std": 0.06795050203800201, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9264549016952515, "max": 1.0266518592834473, "mean": -2.6062916731461883e-05, "std": 0.047624703496694565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8848392963409424, "max": 0.8210154175758362, "mean": -0.00031388079514726996, "std": 0.09599340707063675, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.2704119086265564, "max": 0.24200940132141113, "mean": -2.2776041078031994e-05, "std": 0.03895159065723419, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.909391403198242, "max": 23.011491775512695, "mean": -0.09215216338634491, "std": 4.095620155334473, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.2288811355829239, "max": 0.24590590596199036, "mean": -2.564151509432122e-05, "std": 0.03863710165023804, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.060657572001218796, "max": 0.04613931104540825, "mean": -0.00014338521577883512, "std": 0.014703062362968922, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33906009793281555, "max": 0.37649407982826233, "mean": 7.5478201324585825e-06, "std": 0.04081288352608681, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04671977460384369, "max": 0.19674423336982727, "mean": 0.0002734751324169338, "std": 0.013588963076472282, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3744518756866455, "max": 1.1423423290252686, "mean": 0.890155553817749, "std": 0.0642639547586441, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.44847023487091064, "max": 0.5443573594093323, "mean": 2.4567927539465018e-05, "std": 0.04556553065776825, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.2254226952791214, "max": 0.08823559433221817, "mean": -0.0320654921233654, "std": 0.03788232430815697, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7300624251365662, "max": 0.6936558485031128, "mean": 3.439782449277118e-05, "std": 0.05177776888012886, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.1755923330783844, "max": 0.21977680921554565, "mean": 4.2144907638430595e-05, "std": 0.03183648735284805, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.3417545258998871, "max": 0.3754495084285736, "mean": 4.2937641410389915e-05, "std": 0.03413964807987213, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3177294135093689, "max": 1.2977259159088135, "mean": 0.6017159223556519, "std": 0.08427947759628296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2838163673877716, "max": 0.2612304091453552, "mean": -2.8361523618514184e-06, "std": 0.03598065674304962, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23691536486148834, "max": 0.20665380358695984, "mean": 0.0002377421478740871, "std": 0.05610164627432823, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4367288649082184, "max": 0.326652467250824, "mean": 2.422912439214997e-05, "std": 0.034131284803152084, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.582788944244385, "max": 7.362354278564453, "mean": -0.007508529350161552, "std": 0.7035665512084961, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34583720564842224, "max": 0.3661332130432129, "mean": 0.00010320795263396576, "std": 0.04782785847783089, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07427486777305603, "max": 0.060801248997449875, "mean": 0.0009337762021459639, "std": 0.014963135123252869, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.25689101219177246, "max": 0.28821247816085815, "mean": 4.153083864366636e-06, "std": 0.04155467450618744, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05564720183610916, "max": 0.0631924495100975, "mean": 0.0001379186287522316, "std": 0.007182796951383352, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49357107281684875, "max": 1.2338876724243164, "mean": 1.0134950876235962, "std": 0.11754289269447327, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0940601825714111, "max": 1.0474328994750977, "mean": -4.88213227072265e-05, "std": 0.05240841209888458, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.2248232364654541, "max": 0.17388059198856354, "mean": -0.02729785442352295, "std": 0.036497559398412704, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8899852633476257, "max": 0.9281743168830872, "mean": -0.00014587071200367063, "std": 0.05328153818845749, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17224453389644623, "max": 0.38245582580566406, "mean": 0.0033820997923612595, "std": 0.04001828283071518, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7799473404884338, "max": 0.7260819673538208, "mean": 1.8725522750173695e-05, "std": 0.046160738915205, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.33860552310943604, "max": 1.442690134048462, "mean": 0.9484557509422302, "std": 0.20696218311786652, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7459073066711426, "max": 1.704575538635254, "mean": 0.00022730980708729476, "std": 0.15868498384952545, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.2076622247695923, "max": 1.1073572635650635, "mean": -0.00959145836532116, "std": 0.20509476959705353, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4218980371952057, "max": 0.4278029203414917, "mean": 6.46372718620114e-05, "std": 0.048015668988227844, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.878219604492188, "max": 19.671934127807617, "mean": -0.24954606592655182, "std": 4.8062262535095215, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.3252118229866028, "max": 0.44012102484703064, "mean": -1.1724467185558751e-05, "std": 0.04616120085120201, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03427257761359215, "max": 0.03733307123184204, "mean": 0.0006422841688618064, "std": 0.012923721224069595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7051200270652771, "max": 0.6666434407234192, "mean": 4.353695476311259e-05, "std": 0.0578814335167408, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07273512333631516, "max": 0.06799687445163727, "mean": -0.0001354652486043051, "std": 0.012961134314537048, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3802323043346405, "max": 1.392055869102478, "mean": 1.0665756464004517, "std": 0.2197023183107376, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6175218224525452, "max": 0.7191157341003418, "mean": 0.00011173778329975903, "std": 0.058020252734422684, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.22093473374843597, "max": 0.22644445300102234, "mean": 0.006260717287659645, "std": 0.04986373335123062, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6302544474601746, "max": 0.8900287747383118, "mean": 1.1643458492471837e-05, "std": 0.023527663201093674, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5102453231811523, "max": 0.4771297872066498, "mean": -0.0030403323471546173, "std": 0.06969437003135681, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5377517342567444, "max": 1.1850762367248535, "mean": 0.7829766273498535, "std": 0.09934176504611969, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.26876378059387207, "max": 0.21405881643295288, "mean": -0.00022433605045080185, "std": 0.053995925933122635, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23968708515167236, "max": 0.014838683418929577, "mean": -0.0440097339451313, "std": 0.03449948504567146, "sparsity": 0.0, "shape": [ 100 ] } } }