{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.4302472174167633, "max": 0.2981015741825104, "mean": -0.0025541300419718027, "std": 0.04255979508161545, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06310182064771652, "max": 0.10759169608354568, "mean": 0.0006188107072375715, "std": 0.03408230096101761, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4127681851387024, "max": 0.8368753790855408, "mean": -0.00020183739252388477, "std": 0.024111691862344742, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11528493463993073, "max": 0.32169410586357117, "mean": -0.0009411157225258648, "std": 0.019568322226405144, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7921671867370605, "max": 2.8708858489990234, "mean": -0.00036475385422818363, "std": 0.6154695153236389, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.279247909784317, "max": 0.3815617561340332, "mean": 0.0004244846059009433, "std": 0.04274849221110344, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22255778312683105, "max": 0.2097877562046051, "mean": -0.00448887562379241, "std": 0.040919456630945206, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.42842426896095276, "max": 0.47603461146354675, "mean": 3.9225278669619e-06, "std": 0.024510197341442108, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.3252594470977783, "max": 0.1568366438150406, "mean": -0.04670371487736702, "std": 0.05158696323633194, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41043803095817566, "max": 0.3547053635120392, "mean": -0.00013071295688860118, "std": 0.023602206259965897, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.22980256378650665, "max": 0.26275309920310974, "mean": -0.02913004904985428, "std": 0.04934975132346153, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25458577275276184, "max": 0.8201687335968018, "mean": 0.5254767537117004, "std": 0.08081887662410736, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2970397174358368, "max": 0.2657235562801361, "mean": -0.00042574311373755336, "std": 0.03210281580686569, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09289710968732834, "max": 0.1248435452580452, "mean": 0.0006472540553659201, "std": 0.025739869102835655, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.2908317744731903, "max": 0.2814251184463501, "mean": -7.539847865700722e-05, "std": 0.030931154265999794, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.900259494781494, "max": 5.815035820007324, "mean": -0.009333062916994095, "std": 1.2956619262695312, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42508748173713684, "max": 0.3436461091041565, "mean": 9.804315777728334e-05, "std": 0.029953401535749435, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028917992487549782, "max": 0.027773840352892876, "mean": -0.00031790570938028395, "std": 0.012571282684803009, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4539951980113983, "max": 0.44834843277931213, "mean": 2.359610516577959e-05, "std": 0.02385314740240574, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08872788399457932, "max": 0.0911579355597496, "mean": 0.0022788788191974163, "std": 0.01951882243156433, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26684099435806274, "max": 1.056283712387085, "mean": 0.5311816930770874, "std": 0.10443845391273499, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5745589733123779, "max": 0.608278751373291, "mean": -0.0004312347446102649, "std": 0.03859887644648552, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18254584074020386, "max": 0.04550725594162941, "mean": -0.02946603111922741, "std": 0.042608592659235, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1672769784927368, "max": 1.6339865922927856, "mean": 0.0003258037322666496, "std": 0.027695847675204277, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16238044202327728, "max": 0.205756276845932, "mean": -0.021133966743946075, "std": 0.02794249914586544, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.2238895148038864, "max": 0.8438186645507812, "mean": 0.48762065172195435, "std": 0.07522151619195938, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.2554994523525238, "max": 0.30581825971603394, "mean": -6.700396625092253e-06, "std": 0.03347325325012207, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09536930173635483, "max": 0.11054016649723053, "mean": 6.769842002540827e-05, "std": 0.026959657669067383, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2971096336841583, "max": 0.2961491346359253, "mean": 5.292622518027201e-05, "std": 0.03254416957497597, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.165089130401611, "max": 5.085312843322754, "mean": -0.01459675282239914, "std": 1.1575658321380615, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34498170018196106, "max": 0.3433385491371155, "mean": 7.90221311035566e-05, "std": 0.03006155788898468, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03615141659975052, "max": 0.03325657546520233, "mean": -0.00014247104991227388, "std": 0.01303154043853283, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3154723644256592, "max": 0.37497249245643616, "mean": -2.0466719433898106e-05, "std": 0.02405875362455845, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10546914488077164, "max": 0.12202588468790054, "mean": -0.0019681837875396013, "std": 0.028853828087449074, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3114376366138458, "max": 1.12091863155365, "mean": 0.6662803292274475, "std": 0.09775093197822571, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8727247714996338, "max": 0.6275021433830261, "mean": 0.001675525214523077, "std": 0.047438088804483414, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2714252769947052, "max": 0.03427727520465851, "mean": -0.04661863297224045, "std": 0.04059664160013199, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9225524067878723, "max": 0.9647303223609924, "mean": 0.0010189020540565252, "std": 0.04070537909865379, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.1445721685886383, "max": 0.07502365112304688, "mean": -0.009085974656045437, "std": 0.02569437585771084, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.24001570045948029, "max": 0.7130113244056702, "mean": 0.44724389910697937, "std": 0.059336330741643906, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27252721786499023, "max": 0.2977474629878998, "mean": 9.076926289708354e-06, "std": 0.03546866402029991, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11933133751153946, "max": 0.11861857026815414, "mean": 0.000759843154810369, "std": 0.027626313269138336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2810227572917938, "max": 0.2797848582267761, "mean": -7.693594670854509e-05, "std": 0.03509817644953728, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5099942684173584, "max": 2.5219902992248535, "mean": 0.026751244440674782, "std": 0.5868741273880005, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2210882604122162, "max": 0.27153223752975464, "mean": 2.4560677047702484e-06, "std": 0.030732404440641403, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.0335114523768425, "max": 0.031222868710756302, "mean": 0.00011844941036542878, "std": 0.01240864023566246, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23524264991283417, "max": 0.23183144629001617, "mean": 5.6907440011855215e-05, "std": 0.025696856901049614, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13587476313114166, "max": 0.12763848900794983, "mean": -0.005494903773069382, "std": 0.039958395063877106, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35451608896255493, "max": 1.1720539331436157, "mean": 0.7106262445449829, "std": 0.10376716405153275, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6174105405807495, "max": 0.5545085072517395, "mean": 0.0011598969576880336, "std": 0.04611882567405701, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.1878771334886551, "max": 0.024924062192440033, "mean": -0.0348367840051651, "std": 0.028611591085791588, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1316187381744385, "max": 0.971271812915802, "mean": 0.0003585200756788254, "std": 0.0423467643558979, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5980486869812012, "max": 0.06288419663906097, "mean": -0.0048779072239995, "std": 0.028619417920708656, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3752831816673279, "max": 0.9404632449150085, "mean": 0.5925332307815552, "std": 0.0669492781162262, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3914392590522766, "max": 0.36907821893692017, "mean": 7.118703797459602e-05, "std": 0.03718792647123337, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11911813914775848, "max": 0.1366533637046814, "mean": 0.0009285138221457601, "std": 0.029234997928142548, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6193273067474365, "max": 0.5089406967163086, "mean": 1.5145867109822575e-05, "std": 0.036441244184970856, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.18839168548584, "max": 8.790501594543457, "mean": -0.1092919334769249, "std": 1.6991198062896729, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27663567662239075, "max": 0.23973354697227478, "mean": 5.2983978093834594e-05, "std": 0.032615091651678085, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05204087495803833, "max": 0.03958116099238396, "mean": 9.567412780597806e-05, "std": 0.012961393222212791, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23071666061878204, "max": 0.234710693359375, "mean": -2.1666935936082155e-05, "std": 0.029391352087259293, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20436595380306244, "max": 0.10555993020534515, "mean": -0.004022484645247459, "std": 0.032626353204250336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3398347795009613, "max": 1.0127081871032715, "mean": 0.7008411884307861, "std": 0.09675740450620651, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5648741126060486, "max": 0.8332529664039612, "mean": 0.00041526954737491906, "std": 0.04230087623000145, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.2118305265903473, "max": 0.030412573367357254, "mean": -0.032187312841415405, "std": 0.026507310569286346, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7544606924057007, "max": 0.718633234500885, "mean": -1.3493583537638187e-05, "std": 0.03684115782380104, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26357486844062805, "max": 0.10591558367013931, "mean": -0.0030233184807002544, "std": 0.028867946937680244, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.2842615246772766, "max": 0.6951268911361694, "mean": 0.4995192289352417, "std": 0.04653889685869217, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.2790677845478058, "max": 0.2343253642320633, "mean": -0.00011120391718577594, "std": 0.03876161575317383, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15418817102909088, "max": 0.12667444348335266, "mean": -0.0022305608727037907, "std": 0.033373840153217316, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.4139367640018463, "max": 0.660070538520813, "mean": -1.9737122784135863e-05, "std": 0.03909851238131523, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.238705635070801, "max": 4.723268985748291, "mean": -0.020462416112422943, "std": 1.0078494548797607, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24497364461421967, "max": 0.20763254165649414, "mean": 4.4202079152455553e-05, "std": 0.033965613692998886, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03459359332919121, "max": 0.04478804022073746, "mean": -2.136104740202427e-05, "std": 0.012631777673959732, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20072373747825623, "max": 0.20615817606449127, "mean": -2.975538700411562e-05, "std": 0.031023193150758743, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19997990131378174, "max": 0.11331257969141006, "mean": -0.0029115378856658936, "std": 0.03451942652463913, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36702099442481995, "max": 1.0571231842041016, "mean": 0.6706027388572693, "std": 0.06639590114355087, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.3983962833881378, "max": 0.5022679567337036, "mean": -3.846201434498653e-05, "std": 0.04113590717315674, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12814512848854065, "max": 0.02683641016483307, "mean": -0.03054228238761425, "std": 0.02187994495034218, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44913211464881897, "max": 0.433132529258728, "mean": 7.945985271362588e-05, "std": 0.0348953977227211, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2676845192909241, "max": 0.0728912353515625, "mean": -0.0011024216655641794, "std": 0.023127950727939606, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.2873011827468872, "max": 0.6852278709411621, "mean": 0.5245736837387085, "std": 0.047536205500364304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22235900163650513, "max": 0.2234368920326233, "mean": 1.5712306776549667e-05, "std": 0.0389518178999424, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13648226857185364, "max": 0.10937032103538513, "mean": 0.00023500403040088713, "std": 0.02922363579273224, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.3750153183937073, "max": 0.4373463988304138, "mean": -9.542611223878339e-06, "std": 0.03928782045841217, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8463687896728516, "max": 5.000114917755127, "mean": 0.00974472425878048, "std": 0.8453519344329834, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22320200502872467, "max": 0.2200344353914261, "mean": -1.8790160538628697e-07, "std": 0.03441300988197327, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04361514747142792, "max": 0.03597420081496239, "mean": -0.0002564755268394947, "std": 0.01208114717155695, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21329528093338013, "max": 0.1889103502035141, "mean": -1.6649610188324004e-05, "std": 0.031539641320705414, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18086224794387817, "max": 0.12070237100124359, "mean": -0.002405309583991766, "std": 0.041269298642873764, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4225497245788574, "max": 0.9420632123947144, "mean": 0.6627737283706665, "std": 0.056812334805727005, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3715151250362396, "max": 0.4758515954017639, "mean": -8.248311496572569e-05, "std": 0.040895167738199234, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20838980376720428, "max": 0.027207521721720695, "mean": -0.030246354639530182, "std": 0.02134900726377964, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3401075303554535, "max": 0.7336291074752808, "mean": 8.389431604882702e-05, "std": 0.034770816564559937, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.24028635025024414, "max": 0.05047708749771118, "mean": -0.001194795360788703, "std": 0.020465141162276268, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.30595850944519043, "max": 0.6537705063819885, "mean": 0.5251566767692566, "std": 0.04612725228071213, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30432915687561035, "max": 0.21739104390144348, "mean": 6.996125739533454e-05, "std": 0.03949799761176109, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14943227171897888, "max": 0.13134317100048065, "mean": 0.00034546080860309303, "std": 0.030460603535175323, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25738975405693054, "max": 0.20207944512367249, "mean": 3.1017469154903665e-05, "std": 0.03948727250099182, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.336665153503418, "max": 2.376288890838623, "mean": -0.026247629895806313, "std": 0.44984105229377747, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18894457817077637, "max": 0.21059554815292358, "mean": 3.7193480238784105e-05, "std": 0.034797847270965576, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03164611756801605, "max": 0.03540992736816406, "mean": -0.00020107367890886962, "std": 0.012292974628508091, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18840090930461884, "max": 0.17046599090099335, "mean": -6.797777314204723e-05, "std": 0.032174453139305115, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13930758833885193, "max": 0.13733482360839844, "mean": -0.002516954904422164, "std": 0.05130286514759064, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.46718037128448486, "max": 0.9563874006271362, "mean": 0.6689748764038086, "std": 0.05278700590133667, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3242974579334259, "max": 0.3098086714744568, "mean": -1.3617936929222196e-06, "std": 0.04095118120312691, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12469282001256943, "max": 0.02526070550084114, "mean": -0.030708763748407364, "std": 0.019816862419247627, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.4401431083679199, "max": 0.44523754715919495, "mean": 9.650168067310005e-05, "std": 0.03512365743517876, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22469638288021088, "max": 0.05176383629441261, "mean": -0.0011855906341224909, "std": 0.018477564677596092, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3391278088092804, "max": 0.7394291162490845, "mean": 0.5587280988693237, "std": 0.04140337556600571, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27262935042381287, "max": 0.2784675061702728, "mean": 1.984157097467687e-05, "std": 0.041061654686927795, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.1370200663805008, "max": 0.13985797762870789, "mean": 0.0004876606981270015, "std": 0.026632333174347878, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.49073120951652527, "max": 0.35599616169929504, "mean": 8.872401667758822e-05, "std": 0.040699537843465805, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.2974724769592285, "max": 1.7454196214675903, "mean": -0.021081820130348206, "std": 0.5002042055130005, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.21770520508289337, "max": 0.19793029129505157, "mean": -4.0488688682671636e-05, "std": 0.03423655033111572, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.0412483848631382, "max": 0.038579147309064865, "mean": -0.00014048503362573683, "std": 0.012878325767815113, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17742925882339478, "max": 0.1836576759815216, "mean": 4.762586468132213e-05, "std": 0.031559526920318604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.17993344366550446, "max": 0.18376585841178894, "mean": -0.0022200806997716427, "std": 0.05484066903591156, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742898643016815, "max": 1.0256999731063843, "mean": 0.6453396677970886, "std": 0.05035531893372536, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27185067534446716, "max": 0.3093453645706177, "mean": 0.00011244456982240081, "std": 0.040687281638383865, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10582341998815536, "max": 0.02690320834517479, "mean": -0.02951919659972191, "std": 0.017931465059518814, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.3390185832977295, "max": 0.32922977209091187, "mean": 5.620906449621543e-05, "std": 0.034417424350976944, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18173733353614807, "max": 0.04227666184306145, "mean": -0.0010707223555073142, "std": 0.017213771119713783, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.3254404067993164, "max": 0.6867184638977051, "mean": 0.5112515091896057, "std": 0.036953605711460114, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23387184739112854, "max": 0.22577211260795593, "mean": -3.611366992117837e-05, "std": 0.039180755615234375, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11533147841691971, "max": 0.13174240291118622, "mean": 0.00015339103993028402, "std": 0.029181061312556267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3528231382369995, "max": 0.28539976477622986, "mean": 7.355230991379358e-06, "std": 0.03924909234046936, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.133138179779053, "max": 3.544285774230957, "mean": -0.011592379771173, "std": 0.682723343372345, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.211366206407547, "max": 0.20918519794940948, "mean": 3.47092718584463e-05, "std": 0.03448852524161339, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03565165773034096, "max": 0.04795990511775017, "mean": 0.0007935892790555954, "std": 0.012854626402258873, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21064111590385437, "max": 0.1932363212108612, "mean": -1.2698478712991346e-06, "std": 0.03169921413064003, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18659119307994843, "max": 0.17711447179317474, "mean": -0.0028428896330296993, "std": 0.05864271521568298, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4746437668800354, "max": 1.0418283939361572, "mean": 0.6514592170715332, "std": 0.049664221704006195, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24862074851989746, "max": 0.3290244936943054, "mean": 0.0001805826323106885, "std": 0.04057569056749344, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12504367530345917, "max": 0.024559227749705315, "mean": -0.030504360795021057, "std": 0.017604367807507515, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42111000418663025, "max": 0.4816901385784149, "mean": -1.4580382412532344e-07, "std": 0.03540220111608505, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15185561776161194, "max": 0.04354217275977135, "mean": 4.59605835203547e-05, "std": 0.014884229749441147, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.3155389130115509, "max": 0.6820871829986572, "mean": 0.5529488921165466, "std": 0.04071735590696335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20643381774425507, "max": 0.21991202235221863, "mean": 3.090859536314383e-05, "std": 0.03830238804221153, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13782010972499847, "max": 0.11272551119327545, "mean": 1.9601531676016748e-05, "std": 0.025822695344686508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.40278956294059753, "max": 0.37109923362731934, "mean": 2.618670441734139e-05, "std": 0.03818415477871895, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7713959217071533, "max": 2.8690977096557617, "mean": 0.0011573811061680317, "std": 0.5169072151184082, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20292331278324127, "max": 0.1974206268787384, "mean": 2.9524358978960663e-05, "std": 0.03429995849728584, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05099167302250862, "max": 0.040043603628873825, "mean": -0.00041941594099625945, "std": 0.01342028472572565, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19649569690227509, "max": 0.20179419219493866, "mean": -1.231730857398361e-05, "std": 0.031807754188776016, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19327867031097412, "max": 0.195101797580719, "mean": -0.002969510853290558, "std": 0.06256763637065887, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3495103716850281, "max": 1.0841096639633179, "mean": 0.6672286987304688, "std": 0.055231790989637375, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22513826191425323, "max": 0.25143498182296753, "mean": 0.00035896283225156367, "std": 0.040764935314655304, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.0910005122423172, "max": 0.043744608759880066, "mean": -0.030088767409324646, "std": 0.017610033974051476, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.3535524308681488, "max": 0.30403411388397217, "mean": -4.383287887321785e-05, "std": 0.03712723031640053, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16203957796096802, "max": 0.063482366502285, "mean": -8.168067142833024e-05, "std": 0.019403086975216866, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.3487982153892517, "max": 0.7220908999443054, "mean": 0.542417049407959, "std": 0.039066411554813385, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.2193998396396637, "max": 0.22306619584560394, "mean": -1.1200094377272762e-05, "std": 0.039234746247529984, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.118410125374794, "max": 0.17068907618522644, "mean": 0.00027954723918810487, "std": 0.02511775679886341, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.2468089461326599, "max": 0.3010835647583008, "mean": -3.6559536965796724e-05, "std": 0.03893429413437843, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5055222511291504, "max": 3.714968204498291, "mean": 0.015851959586143494, "std": 0.7825093269348145, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21874836087226868, "max": 0.2377166897058487, "mean": -1.354666437691776e-05, "std": 0.036306966096162796, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04711933806538582, "max": 0.051407281309366226, "mean": 0.0004819422902073711, "std": 0.013517641462385654, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21396194398403168, "max": 0.2176503837108612, "mean": 5.661203613271937e-05, "std": 0.033618949353694916, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21143636107444763, "max": 0.23150545358657837, "mean": -0.0051071615889668465, "std": 0.061890047043561935, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36224639415740967, "max": 1.1013858318328857, "mean": 0.6993460655212402, "std": 0.053608398884534836, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23459650576114655, "max": 0.2449653446674347, "mean": 0.00046337299863807857, "std": 0.04127378761768341, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09813369810581207, "max": 0.06841138750314713, "mean": -0.03143805265426636, "std": 0.018124299123883247, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.3017565906047821, "max": 0.35157960653305054, "mean": -8.145418541971594e-05, "std": 0.04027964174747467, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15233245491981506, "max": 0.1496550738811493, "mean": 0.0002547369513195008, "std": 0.023038377985358238, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.999387264251709, "max": 1.0017390251159668, "mean": 1.0002288818359375, "std": 0.0006608659168705344, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03126532956957817, "max": 0.03126157820224762, "mean": -1.929386235133279e-05, "std": 0.01804366707801819, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031232358887791634, "max": 0.030991962179541588, "mean": -0.0010843182681128383, "std": 0.01795327477157116, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031262245029211044, "max": 0.031266022473573685, "mean": 3.54884014086565e-06, "std": 0.01804407499730587, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.03115827776491642, "max": 0.031178638339042664, "mean": 0.00033397332299500704, "std": 0.01806548982858658, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0002825965639203787, "max": 0.0002991823712363839, "mean": 9.51684285155352e-07, "std": 8.538085967302322e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9994979500770569, "max": 1.0022096633911133, "mean": 1.0004006624221802, "std": 0.0006605891394428909, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03165094926953316, "max": 0.03164109215140343, "mean": -8.348271876457147e-06, "std": 0.018046928569674492, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03134633228182793, "max": 0.031511712819337845, "mean": 0.00030681173666380346, "std": 0.018000956624746323, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.00043215902405790985, "max": 0.00046604761155322194, "mean": 6.842553190722356e-09, "std": 8.495857764501125e-05, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.00026889159926213324, "max": 0.0002754697925411165, "mean": -3.8592878581766854e-07, "std": 8.52422381285578e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3829193115234375, "max": 0.7194843292236328, "mean": 0.5807508826255798, "std": 0.03887004032731056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23809631168842316, "max": 0.1965617835521698, "mean": 2.6561519916867837e-05, "std": 0.03746955841779709, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11867669969797134, "max": 0.1661195158958435, "mean": 0.0009914024267345667, "std": 0.02754930779337883, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.246256485581398, "max": 0.5006742477416992, "mean": -5.049802712164819e-05, "std": 0.03762722760438919, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9423649311065674, "max": 3.7695066928863525, "mean": -0.003572166431695223, "std": 0.6814473271369934, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22738605737686157, "max": 0.2515488564968109, "mean": -1.1636337148956954e-05, "std": 0.03743850067257881, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07163971662521362, "max": 0.08085085451602936, "mean": -0.0005172090604901314, "std": 0.015671856701374054, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22821125388145447, "max": 0.25809258222579956, "mean": -2.8563266823766753e-05, "std": 0.03542532026767731, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20053939521312714, "max": 0.2151157110929489, "mean": -0.005536144133657217, "std": 0.06835491210222244, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.4050634801387787, "max": 1.1895967721939087, "mean": 0.7380250096321106, "std": 0.055244140326976776, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22111129760742188, "max": 0.24610112607479095, "mean": 0.0005211608950048685, "std": 0.04134161397814751, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.1035081148147583, "max": 0.02415246143937111, "mean": -0.03267139568924904, "std": 0.0188875924795866, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.4493615925312042, "max": 0.4224270284175873, "mean": -0.00043286356958560646, "std": 0.046902477741241455, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25133273005485535, "max": 0.47000864148139954, "mean": 0.003200301667675376, "std": 0.04454173892736435, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.31721800565719604, "max": 0.3333887755870819, "mean": -2.5312700017821044e-05, "std": 0.021290434524416924, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3246031403541565, "max": 0.6853436231613159, "mean": 0.5710366368293762, "std": 0.04471459612250328, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16453500092029572, "max": 0.1740685999393463, "mean": -4.849593824474141e-05, "std": 0.03318438306450844, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.1867130845785141, "max": 0.14271552860736847, "mean": 4.246922617312521e-05, "std": 0.02968418225646019, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3805757164955139, "max": 0.24612776935100555, "mean": -9.95914979284862e-06, "std": 0.03276544809341431, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.655998945236206, "max": 3.29028582572937, "mean": -0.014252795837819576, "std": 0.9852345585823059, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23509258031845093, "max": 0.24746716022491455, "mean": -1.7896145436679944e-05, "std": 0.041701558977365494, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.0727391242980957, "max": 0.15445110201835632, "mean": 0.0006684996769763529, "std": 0.0251635629683733, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2665387690067291, "max": 0.24852725863456726, "mean": -1.545724444440566e-05, "std": 0.040141962468624115, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.1895304173231125, "max": 0.1947212517261505, "mean": -0.0012303038965910673, "std": 0.06668580323457718, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32925331592559814, "max": 0.9993983507156372, "mean": 0.7192491888999939, "std": 0.05233968794345856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23172250390052795, "max": 0.24564699828624725, "mean": 0.0001827301166485995, "std": 0.040905360132455826, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11416275054216385, "max": 0.01871776208281517, "mean": -0.04247911646962166, "std": 0.018855031579732895, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3899572193622589, "max": 0.4073238670825958, "mean": -2.1964835468679667e-05, "std": 0.048539649695158005, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6931350827217102, "max": 0.4125315248966217, "mean": 0.0008539482369087636, "std": 0.060291603207588196, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.00041267118649557233, "max": 1.0002655982971191, "mean": 0.00048818063805811107, "std": 0.022091196849942207, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9994122982025146, "max": 1.0017499923706055, "mean": 1.000227689743042, "std": 0.0006477160495705903, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03126005083322525, "max": 0.03126395121216774, "mean": -2.102299185935408e-05, "std": 0.018035007640719414, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031219350174069405, "max": 0.031236182898283005, "mean": -0.0006771045736968517, "std": 0.017829518765211105, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03126310929656029, "max": 0.03126853331923485, "mean": -8.832646017253865e-06, "std": 0.018034033477306366, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.03123609907925129, "max": 0.0312487930059433, "mean": -0.0007298641721718013, "std": 0.0179444570094347, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.00027907025651074946, "max": 0.0002400849189143628, "mean": 2.689231223484967e-06, "std": 8.426107524428517e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9995393753051758, "max": 1.00211501121521, "mean": 1.0004167556762695, "std": 0.0006692331517115235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.031639304012060165, "max": 0.03170545771718025, "mean": 2.9571647246484645e-06, "std": 0.018044477328658104, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.031226763501763344, "max": 0.03141167387366295, "mean": 0.0003237017663195729, "std": 0.018078280612826347, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.00040698132943362, "max": 0.0004357137659098953, "mean": 1.1018712484656135e-06, "std": 8.384210377698764e-05, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.0002671520342119038, "max": 0.00023483953555114567, "mean": 2.1393277620518347e-06, "std": 8.360463834833354e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23457324504852295, "max": 0.2725456655025482, "mean": 7.03098658050294e-06, "std": 0.018811851739883423, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32138916850090027, "max": 0.6936908960342407, "mean": 0.5816767811775208, "std": 0.04592788219451904, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18192121386528015, "max": 0.19770289957523346, "mean": -1.1671071661112364e-05, "std": 0.033187344670295715, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16075590252876282, "max": 0.12948612868785858, "mean": -0.0010705746244639158, "std": 0.03414509445428848, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3322606384754181, "max": 0.3115905225276947, "mean": -1.047878777171718e-05, "std": 0.032237909734249115, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.803586006164551, "max": 8.763325691223145, "mean": 0.09346922487020493, "std": 1.6197253465652466, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23383009433746338, "max": 0.241935133934021, "mean": 4.1345643694512546e-05, "std": 0.04086088761687279, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.0759628489613533, "max": 0.06582564860582352, "mean": 0.0004808574158232659, "std": 0.01941247656941414, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24585530161857605, "max": 0.23399215936660767, "mean": -2.9465345505741425e-06, "std": 0.03943563625216484, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16296693682670593, "max": 0.16089047491550446, "mean": 0.001630417536944151, "std": 0.06527554988861084, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5569126605987549, "max": 0.9438663125038147, "mean": 0.7129403352737427, "std": 0.04013482853770256, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.2285519540309906, "max": 0.2551051676273346, "mean": -4.54609798907768e-05, "std": 0.040580034255981445, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.1348046511411667, "max": 0.022271839901804924, "mean": -0.04135382920503616, "std": 0.01838485151529312, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4216119349002838, "max": 0.3923768699169159, "mean": -4.429011823958717e-06, "std": 0.047790225595235825, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6072338819503784, "max": 0.651410698890686, "mean": 0.0015874950913712382, "std": 0.05684793367981911, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.2518640160560608, "max": 0.3208119571208954, "mean": -6.068093171052169e-06, "std": 0.019615380093455315, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35968896746635437, "max": 0.6824969053268433, "mean": 0.5707405805587769, "std": 0.04298046976327896, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.2206181287765503, "max": 0.177145317196846, "mean": -3.474977711448446e-05, "std": 0.034301795065402985, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16330677270889282, "max": 0.2329079806804657, "mean": 0.0003651169245131314, "std": 0.032845281064510345, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26389849185943604, "max": 0.23990698158740997, "mean": -5.2482428145594895e-05, "std": 0.033900897949934006, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.855096817016602, "max": 5.091324329376221, "mean": 0.043882716447114944, "std": 1.2292898893356323, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24652959406375885, "max": 0.25042256712913513, "mean": 7.212234049802646e-05, "std": 0.043991539627313614, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06257897615432739, "max": 0.05448286980390549, "mean": 0.0006493264227174222, "std": 0.017185840755701065, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2864921987056732, "max": 0.2719077467918396, "mean": -4.989763692719862e-05, "std": 0.04298979416489601, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16073212027549744, "max": 0.17026235163211823, "mean": -0.0028884499333798885, "std": 0.059281300753355026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5197049379348755, "max": 0.9328829050064087, "mean": 0.7135671973228455, "std": 0.038414619863033295, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23812194168567657, "max": 0.24923060834407806, "mean": 0.0004647884052246809, "std": 0.040460310876369476, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14453770220279694, "max": 0.041513390839099884, "mean": -0.039691261947155, "std": 0.020545845851302147, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5327961444854736, "max": 0.5830419063568115, "mean": 6.150515218905639e-06, "std": 0.04886715114116669, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5192174911499023, "max": 0.493362694978714, "mean": 0.002359903883188963, "std": 0.05345294252038002, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2736135721206665, "max": 0.31528207659721375, "mean": 1.917778718052432e-06, "std": 0.020052393898367882, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3661349415779114, "max": 0.7114736437797546, "mean": 0.5932135581970215, "std": 0.045942142605781555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.2110714167356491, "max": 0.19956345856189728, "mean": 3.0644099751953036e-05, "std": 0.03486814722418785, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18721066415309906, "max": 0.20390057563781738, "mean": 0.0009557952871546149, "std": 0.031514741480350494, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2895534336566925, "max": 0.3397268056869507, "mean": -4.745465412270278e-05, "std": 0.034589096903800964, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.877371311187744, "max": 3.3874666690826416, "mean": 0.014458310790359974, "std": 0.8584496378898621, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.2243141233921051, "max": 0.24994920194149017, "mean": -4.160197022429202e-06, "std": 0.04223477095365524, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.055164966732263565, "max": 0.046595554798841476, "mean": -1.914352469611913e-05, "std": 0.01584389992058277, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.29299500584602356, "max": 0.29091835021972656, "mean": -7.332260793191381e-06, "std": 0.041949693113565445, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12478315085172653, "max": 0.2593647241592407, "mean": -0.0032380004413425922, "std": 0.05315803363919258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4563468396663666, "max": 0.8445391654968262, "mean": 0.7056366205215454, "std": 0.03522425889968872, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5114319920539856, "max": 0.34831947088241577, "mean": 0.0003425391623750329, "std": 0.04020523279905319, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18698035180568695, "max": 0.0395214818418026, "mean": -0.039389487355947495, "std": 0.021351324394345284, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5443570613861084, "max": 0.556300938129425, "mean": -7.182909030234441e-05, "std": 0.05074186250567436, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5117379426956177, "max": 0.6643521785736084, "mean": 0.002444902202114463, "std": 0.04953118786215782, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.3325459361076355, "max": 0.26552852988243103, "mean": 3.543416823958978e-06, "std": 0.019390085712075233, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3221725821495056, "max": 0.7663489580154419, "mean": 0.6510671973228455, "std": 0.045311350375413895, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.24964848160743713, "max": 0.21960312128067017, "mean": -2.564733222243376e-06, "std": 0.0365014374256134, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.3271941542625427, "max": 0.2872978150844574, "mean": -0.0006782531854696572, "std": 0.038559023290872574, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3100341856479645, "max": 0.36996597051620483, "mean": 6.477468559751287e-05, "std": 0.036241985857486725, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.717563152313232, "max": 5.807804584503174, "mean": 0.037958286702632904, "std": 1.4132274389266968, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.2217635214328766, "max": 0.20596979558467865, "mean": -7.51121697248891e-05, "std": 0.04249033331871033, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07755438983440399, "max": 0.051571402698755264, "mean": -0.0009240633808076382, "std": 0.016407648101449013, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3310355246067047, "max": 0.32923752069473267, "mean": -4.983477538189618e-06, "std": 0.04279704764485359, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2849341332912445, "max": 0.11188604682683945, "mean": -0.0012093198020011187, "std": 0.04701279476284981, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4862992763519287, "max": 0.8870015740394592, "mean": 0.7375336289405823, "std": 0.038240909576416016, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3614071309566498, "max": 0.2742360532283783, "mean": 5.11927210027352e-05, "std": 0.040651749819517136, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24774602055549622, "max": 0.04635339602828026, "mean": -0.03926930949091911, "std": 0.02325906977057457, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6263424754142761, "max": 0.5970045328140259, "mean": -5.938729736953974e-05, "std": 0.05312504991889, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7097107172012329, "max": 0.26584240794181824, "mean": 0.0009143413626588881, "std": 0.051234155893325806, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3434857726097107, "max": 0.30358248949050903, "mean": 1.7036518329405226e-07, "std": 0.019139336422085762, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.3498973548412323, "max": 0.782823920249939, "mean": 0.6388742327690125, "std": 0.04923625662922859, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20567476749420166, "max": 0.20698602497577667, "mean": -5.99086306465324e-05, "std": 0.03769771382212639, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25861528515815735, "max": 0.2681594491004944, "mean": -0.00040319678373634815, "std": 0.04461444541811943, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.354155570268631, "max": 0.3225230574607849, "mean": -7.215602636279073e-06, "std": 0.03720592334866524, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.261765480041504, "max": 4.204793453216553, "mean": -0.026421742513775826, "std": 1.0068086385726929, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23872113227844238, "max": 0.24366846680641174, "mean": -2.556562321842648e-05, "std": 0.043214697390794754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.0623321607708931, "max": 0.056722186505794525, "mean": 0.0003460783918853849, "std": 0.014153210446238518, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4375341534614563, "max": 0.3737650513648987, "mean": 1.4479240235232282e-05, "std": 0.04412652924656868, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.0964576005935669, "max": 0.1761614829301834, "mean": -0.0006592521094717085, "std": 0.035152681171894073, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4216594994068146, "max": 1.0695232152938843, "mean": 0.7485226988792419, "std": 0.042068321257829666, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2659566104412079, "max": 0.2967792749404907, "mean": -7.885666127549484e-05, "std": 0.04081219807267189, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18503932654857635, "max": 0.04330001026391983, "mean": -0.03681433945894241, "std": 0.025581127032637596, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4577261507511139, "max": 0.4869215786457062, "mean": 4.5667507947655395e-05, "std": 0.05421961098909378, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.286339670419693, "max": 0.5517974495887756, "mean": -0.0008834124309942126, "std": 0.047834936529397964, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2927553355693817, "max": 0.32282471656799316, "mean": 6.005510840623174e-06, "std": 0.01997239701449871, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.2911321222782135, "max": 0.7601316571235657, "mean": 0.6508502960205078, "std": 0.052130550146102905, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.24379833042621613, "max": 0.26165705919265747, "mean": -5.548093668039655e-06, "std": 0.03961396589875221, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.267425537109375, "max": 0.20018436014652252, "mean": -0.0008745841332711279, "std": 0.05175970122218132, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.27216100692749023, "max": 0.2537060081958771, "mean": 4.9225500333704986e-06, "std": 0.03871043771505356, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.965754508972168, "max": 15.947580337524414, "mean": 0.03322947770357132, "std": 1.9892938137054443, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20684319734573364, "max": 0.22589777410030365, "mean": -7.25259305909276e-05, "std": 0.040558259934186935, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06933015584945679, "max": 0.06318464130163193, "mean": 0.00015395943773910403, "std": 0.014743377454578876, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4654809832572937, "max": 0.3203279674053192, "mean": 1.985491326195188e-05, "std": 0.0405937097966671, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06401513516902924, "max": 0.11543548107147217, "mean": 0.0011928649619221687, "std": 0.024708228185772896, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37496218085289, "max": 0.9319577217102051, "mean": 0.7510663270950317, "std": 0.04019522666931152, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.27932143211364746, "max": 0.2732137441635132, "mean": -0.00016841731849126518, "std": 0.04100305214524269, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19859075546264648, "max": 0.05119071155786514, "mean": -0.032025426626205444, "std": 0.02508244849741459, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6584441065788269, "max": 0.5357497930526733, "mean": -4.779139635502361e-05, "std": 0.05285602807998657, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19279344379901886, "max": 0.5823235511779785, "mean": -0.0005150774959474802, "std": 0.04108597710728645, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.417548805475235, "max": 0.3718253970146179, "mean": 6.455363291024696e-06, "std": 0.021627577021718025, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.2144990712404251, "max": 0.7469203472137451, "mean": 0.6495254039764404, "std": 0.054346147924661636, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.2095586657524109, "max": 0.19582423567771912, "mean": 4.027899194625206e-05, "std": 0.039461854845285416, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.3295177221298218, "max": 0.25955715775489807, "mean": -0.003232627874240279, "std": 0.056272272020578384, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20599152147769928, "max": 0.2547609806060791, "mean": 5.4062355047790334e-05, "std": 0.0385642908513546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.2438578605651855, "max": 6.932709217071533, "mean": 0.0483400858938694, "std": 1.3851662874221802, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.2099662721157074, "max": 0.23050634562969208, "mean": -4.679883659264306e-06, "std": 0.04131751507520676, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04376063123345375, "max": 0.03601124510169029, "mean": -5.941561539657414e-06, "std": 0.012793137691915035, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39767444133758545, "max": 0.34496286511421204, "mean": -5.524931111722253e-05, "std": 0.04239441454410553, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.055049996823072433, "max": 0.06284762173891068, "mean": 0.0003571161942090839, "std": 0.018672263249754906, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.35070401430130005, "max": 1.045300006866455, "mean": 0.7896326184272766, "std": 0.04874366521835327, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3336896300315857, "max": 0.38648444414138794, "mean": -0.00016903391224332154, "std": 0.04148908331990242, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15745577216148376, "max": 0.05912669003009796, "mean": -0.03182134032249451, "std": 0.02510516531765461, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6961610317230225, "max": 0.46920138597488403, "mean": -8.453470945823938e-05, "std": 0.051804590970277786, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.2479942888021469, "max": 0.32869523763656616, "mean": -0.00026210874784737825, "std": 0.04145258665084839, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2870560884475708, "max": 0.3504050374031067, "mean": -2.7076764581579482e-06, "std": 0.024242233484983444, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19671642780303955, "max": 0.779133677482605, "mean": 0.6702357530593872, "std": 0.058674510568380356, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.2289534956216812, "max": 0.23123182356357574, "mean": -2.0453815523069352e-05, "std": 0.040439117699861526, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22002340853214264, "max": 0.24095596373081207, "mean": 0.0007837469456717372, "std": 0.05583859235048294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.2165220081806183, "max": 0.22644749283790588, "mean": -7.203388668131083e-05, "std": 0.03937385976314545, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.905970573425293, "max": 9.068842887878418, "mean": -0.001253342255949974, "std": 1.848394513130188, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2694862186908722, "max": 0.2589434087276459, "mean": 4.364973574411124e-05, "std": 0.038410402834415436, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05793758109211922, "max": 0.05797392502427101, "mean": 0.0003538080782163888, "std": 0.01471701916307211, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.26422733068466187, "max": 0.28839007019996643, "mean": -6.168079562485218e-05, "std": 0.039077237248420715, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04391145706176758, "max": 0.03739985078573227, "mean": -9.783620771486312e-05, "std": 0.013347266241908073, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3393727242946625, "max": 1.0925297737121582, "mean": 0.8639394640922546, "std": 0.06387537717819214, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4232022762298584, "max": 0.41904953122138977, "mean": 0.000313526950776577, "std": 0.043511807918548584, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21472877264022827, "max": 0.1706702560186386, "mean": -0.029442301020026207, "std": 0.03188013657927513, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5987806916236877, "max": 0.5598706007003784, "mean": -0.00014896712673362345, "std": 0.05345924198627472, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17873696982860565, "max": 0.3771279752254486, "mean": 0.001353989471681416, "std": 0.037307873368263245, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39440417289733887, "max": 0.36891528964042664, "mean": 3.757418380700983e-05, "std": 0.028618069365620613, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2904903292655945, "max": 0.8274624347686768, "mean": 0.7055505514144897, "std": 0.06785926967859268, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9264864325523376, "max": 1.0268279314041138, "mean": -2.7663820219459012e-05, "std": 0.04763999581336975, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8792101740837097, "max": 0.8157498240470886, "mean": -0.00029962146072648466, "std": 0.09555412083864212, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.26960939168930054, "max": 0.24089379608631134, "mean": -2.2403137336368673e-05, "std": 0.038951266556978226, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.743011474609375, "max": 22.851470947265625, "mean": -0.09188262373209, "std": 4.07051944732666, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.2278052568435669, "max": 0.2454863339662552, "mean": -2.561333167250268e-05, "std": 0.0386415459215641, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.060211505740880966, "max": 0.04552706331014633, "mean": -0.00013798139116261154, "std": 0.014687996357679367, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3381482660770416, "max": 0.3747510015964508, "mean": 7.467011528206058e-06, "std": 0.04082018882036209, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04627379775047302, "max": 0.19550754129886627, "mean": 0.00027567092911340296, "std": 0.01355433464050293, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3735484182834625, "max": 1.130308985710144, "mean": 0.8902099132537842, "std": 0.06400929391384125, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4475434124469757, "max": 0.5425565838813782, "mean": 2.4953253159765154e-05, "std": 0.0455789715051651, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.223903626203537, "max": 0.08773155510425568, "mean": -0.0320122167468071, "std": 0.03775562718510628, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7259138822555542, "max": 0.6885775923728943, "mean": 3.529630339471623e-05, "std": 0.05179176479578018, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17448118329048157, "max": 0.2181989699602127, "mean": 3.60202684532851e-05, "std": 0.03176648169755936, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.33964094519615173, "max": 0.3732447624206543, "mean": 4.3327472667442635e-05, "std": 0.03413660451769829, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31786343455314636, "max": 1.2872315645217896, "mean": 0.6015468835830688, "std": 0.08348662406206131, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2830578088760376, "max": 0.26022711396217346, "mean": -2.739398723861086e-06, "std": 0.03598024696111679, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.2355988472700119, "max": 0.205682173371315, "mean": 0.00023985601728782058, "std": 0.05602918937802315, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4354512095451355, "max": 0.3249225318431854, "mean": 2.4408442186540924e-05, "std": 0.034124936908483505, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.546271324157715, "max": 7.313862323760986, "mean": -0.007370356470346451, "std": 0.6993649005889893, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.3440709412097931, "max": 0.3629132807254791, "mean": 0.00010299268615199253, "std": 0.04783618077635765, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07372982054948807, "max": 0.060475897043943405, "mean": 0.0009333858033642173, "std": 0.014939810149371624, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2562521696090698, "max": 0.2865331768989563, "mean": 4.6935901991673745e-06, "std": 0.04156438633799553, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05538477003574371, "max": 0.06286550313234329, "mean": 0.00012986664660274982, "std": 0.007165286689996719, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49387338757514954, "max": 1.2207623720169067, "mean": 1.0135465860366821, "std": 0.11748857796192169, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939587354660034, "max": 1.0474854707717896, "mean": -4.887886461801827e-05, "std": 0.052416812628507614, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22367814183235168, "max": 0.17331884801387787, "mean": -0.027228882536292076, "std": 0.03631311282515526, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8839902877807617, "max": 0.9222039580345154, "mean": -0.00014613418898079544, "std": 0.0532962903380394, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17102710902690887, "max": 0.37978917360305786, "mean": 0.0033693695440888405, "std": 0.03987928107380867, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7775930762290955, "max": 0.7230536341667175, "mean": 1.795422940631397e-05, "std": 0.04615578427910805, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.33868706226348877, "max": 1.428168535232544, "mean": 0.948466420173645, "std": 0.206797257065773, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7458410263061523, "max": 1.7044554948806763, "mean": 0.00022709151380695403, "std": 0.1587017923593521, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.199466586112976, "max": 1.1009190082550049, "mean": -0.009544244036078453, "std": 0.20388931035995483, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4210292100906372, "max": 0.42779824137687683, "mean": 6.407736509572715e-05, "std": 0.04801918938755989, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.747289657592773, "max": 19.542404174804688, "mean": -0.24833638966083527, "std": 4.7769317626953125, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32385140657424927, "max": 0.4385547339916229, "mean": -1.1735279258573428e-05, "std": 0.04616609960794449, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.0338931679725647, "max": 0.036946121603250504, "mean": 0.0006420122808776796, "std": 0.012915823608636856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7037211656570435, "max": 0.668102502822876, "mean": 4.292663652449846e-05, "std": 0.05789082497358322, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07235053181648254, "max": 0.06769613176584244, "mean": -0.0001348661899100989, "std": 0.01290997676551342, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38041582703590393, "max": 1.3927761316299438, "mean": 1.06671142578125, "std": 0.21977396309375763, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6164037585258484, "max": 0.7183761596679688, "mean": 0.00011247429210925475, "std": 0.05802652984857559, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21889959275722504, "max": 0.22502842545509338, "mean": 0.006201672367751598, "std": 0.049709536135196686, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6296432018280029, "max": 0.8894878029823303, "mean": 1.1972185347985942e-05, "std": 0.02354392781853676, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5068784356117249, "max": 0.47380438446998596, "mean": -0.0030183307826519012, "std": 0.06925629079341888, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5380737781524658, "max": 1.1801798343658447, "mean": 0.7828105092048645, "std": 0.09876621514558792, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2670763432979584, "max": 0.21297039091587067, "mean": -0.0002238377055618912, "std": 0.05400474742054939, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23828226327896118, "max": 0.014816822484135628, "mean": -0.043933507055044174, "std": 0.034287311136722565, "sparsity": 0.0, "shape": [ 100 ] } } }