{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43036678433418274, "max": 0.2982814610004425, "mean": -0.0025639168452471495, "std": 0.04256023094058037, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06307890266180038, "max": 0.10733882337808609, "mean": 0.000591748976148665, "std": 0.034078747034072876, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.41281235218048096, "max": 0.8368205428123474, "mean": -0.00020580022828653455, "std": 0.02411011978983879, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11508890986442566, "max": 0.3209010660648346, "mean": -0.0009312849142588675, "std": 0.01954229176044464, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7886247634887695, "max": 2.8676700592041016, "mean": -0.0003673843457363546, "std": 0.6154846549034119, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.27876999974250793, "max": 0.3816433846950531, "mean": 0.00041971245082095265, "std": 0.0427577942609787, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22179193794727325, "max": 0.20910178124904633, "mean": -0.00449436716735363, "std": 0.0408766008913517, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4284907877445221, "max": 0.4762955904006958, "mean": 1.3556076510212733e-06, "std": 0.024511976167559624, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32450857758522034, "max": 0.15602749586105347, "mean": -0.04666242375969887, "std": 0.05150512233376503, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4105537235736847, "max": 0.35443225502967834, "mean": -0.00012739744852297008, "std": 0.023602385073900223, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.22917909920215607, "max": 0.2621273994445801, "mean": -0.029117178171873093, "std": 0.049283698201179504, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2544216215610504, "max": 0.8185670971870422, "mean": 0.5252723693847656, "std": 0.08049405366182327, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2967362403869629, "max": 0.26540544629096985, "mean": -0.0004257934633642435, "std": 0.032104942947626114, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09282971918582916, "max": 0.12431935220956802, "mean": 0.000645699561573565, "std": 0.02571764960885048, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.2909117043018341, "max": 0.28097161650657654, "mean": -7.593112241011113e-05, "std": 0.030932165682315826, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.890472888946533, "max": 5.805418491363525, "mean": -0.009322225116193295, "std": 1.2942466735839844, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42496761679649353, "max": 0.3436029851436615, "mean": 9.743953705765307e-05, "std": 0.029953880235552788, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028933702036738396, "max": 0.027695059776306152, "mean": -0.00032178848050534725, "std": 0.012570273131132126, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45337191224098206, "max": 0.44843629002571106, "mean": 2.4102073439280502e-05, "std": 0.023851700127124786, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08852554112672806, "max": 0.09096554666757584, "mean": 0.0022833123803138733, "std": 0.01949877291917801, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2666127681732178, "max": 1.0543620586395264, "mean": 0.5309467911720276, "std": 0.10404026508331299, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5751341581344604, "max": 0.6088229417800903, "mean": -0.0004320710140746087, "std": 0.0386008694767952, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18247970938682556, "max": 0.04547928646206856, "mean": -0.029448386281728745, "std": 0.04255641624331474, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.166790246963501, "max": 1.6334140300750732, "mean": 0.00032607169123366475, "std": 0.02769557386636734, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16213519871234894, "max": 0.2053978145122528, "mean": -0.021131210029125214, "std": 0.02792428247630596, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22390854358673096, "max": 0.8422228693962097, "mean": 0.4874723255634308, "std": 0.0749419778585434, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.2551497519016266, "max": 0.3057706952095032, "mean": -7.631031621713191e-06, "std": 0.03347672149538994, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09526324272155762, "max": 0.11054196208715439, "mean": 5.9016994782723486e-05, "std": 0.026952214539051056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.29700320959091187, "max": 0.29560279846191406, "mean": 5.1945076847914606e-05, "std": 0.03254617378115654, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.157034397125244, "max": 5.077272891998291, "mean": -0.014557666145265102, "std": 1.1561598777770996, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34469008445739746, "max": 0.3430800437927246, "mean": 7.922034274088219e-05, "std": 0.03006283938884735, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03611171245574951, "max": 0.03316429257392883, "mean": -0.00014332182763610035, "std": 0.013021831400692463, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3155629634857178, "max": 0.3745230734348297, "mean": -2.0780769773409702e-05, "std": 0.024060120806097984, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10523121803998947, "max": 0.12181323021650314, "mean": -0.0019697900861501694, "std": 0.028833730146288872, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.31127864122390747, "max": 1.118981957435608, "mean": 0.6661038398742676, "std": 0.09739536792039871, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8734181523323059, "max": 0.6272271275520325, "mean": 0.0016762978630140424, "std": 0.04744264855980873, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27110713720321655, "max": 0.03433133661746979, "mean": -0.04661067947745323, "std": 0.04056624323129654, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9215274453163147, "max": 0.9644713997840881, "mean": 0.0010202918201684952, "std": 0.0407060943543911, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.1444365382194519, "max": 0.07489711046218872, "mean": -0.00908645335584879, "std": 0.02568359486758709, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23954921960830688, "max": 0.7114554047584534, "mean": 0.44711926579475403, "std": 0.059072595089673996, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27211347222328186, "max": 0.29757410287857056, "mean": 9.160639820038341e-06, "std": 0.03547541797161102, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11930356919765472, "max": 0.1185561791062355, "mean": 0.0007570894667878747, "std": 0.027588583528995514, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2805509567260742, "max": 0.2793390452861786, "mean": -7.711815123911947e-05, "std": 0.03510286659002304, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5059573650360107, "max": 2.5179529190063477, "mean": 0.02672126702964306, "std": 0.5862834453582764, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.22094596922397614, "max": 0.27129310369491577, "mean": 2.4950504666776396e-06, "std": 0.030734829604625702, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03352592885494232, "max": 0.03140881285071373, "mean": 0.00011744203220587224, "std": 0.012399573810398579, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23510752618312836, "max": 0.23160243034362793, "mean": 5.7065204600803554e-05, "std": 0.02570049650967121, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13554446399211884, "max": 0.1277279406785965, "mean": -0.005496564321219921, "std": 0.039924751967191696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3543228507041931, "max": 1.169933795928955, "mean": 0.7103918194770813, "std": 0.10339365899562836, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6172032952308655, "max": 0.5551565885543823, "mean": 0.0011604262981563807, "std": 0.04612047225236893, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18880973756313324, "max": 0.02472936362028122, "mean": -0.034827686846256256, "std": 0.028596267104148865, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1306864023208618, "max": 0.9699204564094543, "mean": 0.00035697812563739717, "std": 0.0423479862511158, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5971187949180603, "max": 0.06284646689891815, "mean": -0.00487535959109664, "std": 0.028591454029083252, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.37525925040245056, "max": 0.938994288444519, "mean": 0.5923536419868469, "std": 0.06656986474990845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3912387192249298, "max": 0.3688672184944153, "mean": 7.05350175849162e-05, "std": 0.03718964010477066, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11892075091600418, "max": 0.13641902804374695, "mean": 0.0009228037670254707, "std": 0.029190916568040848, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6183786392211914, "max": 0.5081523060798645, "mean": 1.5137170521484222e-05, "std": 0.036442697048187256, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.175475120544434, "max": 8.77673053741455, "mean": -0.10916879773139954, "std": 1.6969348192214966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27656111121177673, "max": 0.23974747955799103, "mean": 5.267578671919182e-05, "std": 0.03261591121554375, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.051889754831790924, "max": 0.03952917456626892, "mean": 9.714082989376038e-05, "std": 0.012956415303051472, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23051224648952484, "max": 0.23422203958034515, "mean": -2.1783589545520954e-05, "std": 0.029392505064606667, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20386114716529846, "max": 0.105349101126194, "mean": -0.004017278086394072, "std": 0.032608963549137115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3398902118206024, "max": 1.0104986429214478, "mean": 0.7006295919418335, "std": 0.09645849466323853, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5644850134849548, "max": 0.8330016136169434, "mean": 0.0004154921043664217, "std": 0.04230193421244621, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21176111698150635, "max": 0.030274739488959312, "mean": -0.03216158226132393, "std": 0.02647627517580986, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7536418437957764, "max": 0.7178125381469727, "mean": -1.392904141539475e-05, "std": 0.03684176877140999, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2630210220813751, "max": 0.10589547455310822, "mean": -0.0030209862161427736, "std": 0.028848819434642792, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.2840619385242462, "max": 0.6940633654594421, "mean": 0.4993802607059479, "std": 0.04630398005247116, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27834540605545044, "max": 0.23377880454063416, "mean": -0.00011083983554271981, "std": 0.03876272216439247, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15375865995883942, "max": 0.12639263272285461, "mean": -0.002223189687356353, "std": 0.03333896026015282, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.413473516702652, "max": 0.6594987511634827, "mean": -1.9574425095925108e-05, "std": 0.039102163165807724, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.232054233551025, "max": 4.715608596801758, "mean": -0.020489608868956566, "std": 1.0068248510360718, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24494825303554535, "max": 0.20708487927913666, "mean": 4.434686343302019e-05, "std": 0.03396739438176155, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.034493304789066315, "max": 0.04486649110913277, "mean": -2.654863055795431e-05, "std": 0.012638254091143608, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.2005356252193451, "max": 0.2055814564228058, "mean": -3.0033888833713718e-05, "std": 0.031025094911456108, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19959698617458344, "max": 0.11300574988126755, "mean": -0.002902751788496971, "std": 0.03449735790491104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3668424189090729, "max": 1.05502188205719, "mean": 0.6704874634742737, "std": 0.06617505103349686, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.3976363241672516, "max": 0.5017815828323364, "mean": -3.87727704946883e-05, "std": 0.041137050837278366, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12772123515605927, "max": 0.026762252673506737, "mean": -0.03051420859992504, "std": 0.021863147616386414, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44920089840888977, "max": 0.4333121180534363, "mean": 7.599063974339515e-05, "std": 0.034896738827228546, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2671979069709778, "max": 0.07298687100410461, "mean": -0.0010975392069667578, "std": 0.023116325959563255, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28697913885116577, "max": 0.6839067339897156, "mean": 0.5244333744049072, "std": 0.047293804585933685, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22255805134773254, "max": 0.22290681302547455, "mean": 1.621080627955962e-05, "std": 0.03895403817296028, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13629747927188873, "max": 0.109336718916893, "mean": 0.0002461877593304962, "std": 0.02917083166539669, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.3738900125026703, "max": 0.43744465708732605, "mean": -9.668656275607646e-06, "std": 0.03929208964109421, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.840332269668579, "max": 4.992400646209717, "mean": 0.009748304262757301, "std": 0.8444803953170776, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22292070090770721, "max": 0.21977820992469788, "mean": -4.448638719622977e-07, "std": 0.03441440686583519, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04357949644327164, "max": 0.03590534254908562, "mean": -0.000258232990745455, "std": 0.012078864499926567, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21297886967658997, "max": 0.18814441561698914, "mean": -1.71422834682744e-05, "std": 0.031540658324956894, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.1805071383714676, "max": 0.12073972076177597, "mean": -0.00239769509062171, "std": 0.04125608131289482, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4227307438850403, "max": 0.9400621056556702, "mean": 0.662601888179779, "std": 0.056538671255111694, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.37151503562927246, "max": 0.4761146008968353, "mean": -8.195374539354816e-05, "std": 0.040896203368902206, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20797580480575562, "max": 0.027151037007570267, "mean": -0.030222713947296143, "std": 0.021336952224373817, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.33968234062194824, "max": 0.7333835959434509, "mean": 8.077031816355884e-05, "std": 0.034772153943777084, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.23987196385860443, "max": 0.05037139728665352, "mean": -0.0011877692304551601, "std": 0.020454443991184235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.30607396364212036, "max": 0.652435839176178, "mean": 0.5250428915023804, "std": 0.04590361937880516, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3039066791534424, "max": 0.21754606068134308, "mean": 7.030011329334229e-05, "std": 0.03950100764632225, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14914348721504211, "max": 0.13110090792179108, "mean": 0.00035085732815787196, "std": 0.030418941751122475, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.2568054795265198, "max": 0.20193904638290405, "mean": 3.147923416690901e-05, "std": 0.03949080780148506, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3329901695251465, "max": 2.3725619316101074, "mean": -0.02622254565358162, "std": 0.4494195282459259, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18853308260440826, "max": 0.2103482335805893, "mean": 3.745816502487287e-05, "std": 0.03479913994669914, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03156094253063202, "max": 0.035385265946388245, "mean": -0.0001973491598619148, "std": 0.012292337603867054, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.1882481426000595, "max": 0.17012155055999756, "mean": -6.810311606386676e-05, "std": 0.03217574581503868, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13900111615657806, "max": 0.13692621886730194, "mean": -0.002514890395104885, "std": 0.051281191408634186, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.46707433462142944, "max": 0.9541991353034973, "mean": 0.6688030958175659, "std": 0.052486222237348557, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.32425403594970703, "max": 0.30980852246284485, "mean": -1.290425643674098e-06, "std": 0.040951915085315704, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12465585768222809, "max": 0.02537902072072029, "mean": -0.030681122094392776, "std": 0.0198006983846426, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.43958571553230286, "max": 0.44490763545036316, "mean": 9.539163875160739e-05, "std": 0.0351250097155571, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.2243558019399643, "max": 0.0517578125, "mean": -0.0011802279623225331, "std": 0.018464019522070885, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.33896756172180176, "max": 0.7381694912910461, "mean": 0.5586157441139221, "std": 0.04119841381907463, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27227938175201416, "max": 0.27836883068084717, "mean": 1.999387313844636e-05, "std": 0.041062600910663605, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13660800457000732, "max": 0.1392778903245926, "mean": 0.0004841584013774991, "std": 0.02658114954829216, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4896349310874939, "max": 0.3551800847053528, "mean": 8.872073522070423e-05, "std": 0.04069973900914192, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.293769121170044, "max": 1.742555856704712, "mean": -0.02106180600821972, "std": 0.49974092841148376, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2175416797399521, "max": 0.19781090319156647, "mean": -4.052485746797174e-05, "std": 0.03423763066530228, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04145532101392746, "max": 0.038727227598428726, "mean": -0.00013765225594397634, "std": 0.012874336913228035, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.177314892411232, "max": 0.1832207590341568, "mean": 4.75629567517899e-05, "std": 0.03156043216586113, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.1798381805419922, "max": 0.18348462879657745, "mean": -0.002212759107351303, "std": 0.054820165038108826, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742435812950134, "max": 1.0238897800445557, "mean": 0.6451865434646606, "std": 0.05008064582943916, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2714613080024719, "max": 0.3092961311340332, "mean": 0.00011265614011790603, "std": 0.04068758338689804, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.1055683121085167, "max": 0.026772309094667435, "mean": -0.029506118968129158, "std": 0.017915068194270134, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.33880147337913513, "max": 0.3287900686264038, "mean": 5.556903124670498e-05, "std": 0.03441847860813141, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18144568800926208, "max": 0.04239530488848686, "mean": -0.001068950048647821, "std": 0.017201630398631096, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.3253972828388214, "max": 0.68559730052948, "mean": 0.5111000537872314, "std": 0.03672371804714203, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23373860120773315, "max": 0.22572296857833862, "mean": -3.580976772354916e-05, "std": 0.039181455969810486, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11530666053295135, "max": 0.1317266821861267, "mean": 0.00015847355825826526, "std": 0.029152128845453262, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3521575629711151, "max": 0.2847552001476288, "mean": 7.120977898011915e-06, "std": 0.039250005036592484, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.126590728759766, "max": 3.538623332977295, "mean": -0.01155401673167944, "std": 0.6819069385528564, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21105211973190308, "max": 0.20891818404197693, "mean": 3.4748343750834465e-05, "std": 0.03448968380689621, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03559347987174988, "max": 0.04803197458386421, "mean": 0.0007964627584442496, "std": 0.012855397537350655, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21038679778575897, "max": 0.1929050087928772, "mean": -1.3255728390504373e-06, "std": 0.0317002572119236, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18651214241981506, "max": 0.17674075067043304, "mean": -0.002840832807123661, "std": 0.05859901383519173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4748058021068573, "max": 1.0396208763122559, "mean": 0.6513342261314392, "std": 0.049332328140735626, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.2482759803533554, "max": 0.3290877640247345, "mean": 0.00018071771773975343, "std": 0.04057670012116432, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12517917156219482, "max": 0.02484654076397419, "mean": -0.030485937371850014, "std": 0.017585651949048042, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42004328966140747, "max": 0.48050060868263245, "mean": -1.1724823707481846e-06, "std": 0.03540315851569176, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15136678516864777, "max": 0.04356072470545769, "mean": 4.775111301569268e-05, "std": 0.014870403334498405, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.3155934810638428, "max": 0.6807596683502197, "mean": 0.5528346300125122, "std": 0.04051977023482323, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.2063884735107422, "max": 0.21910899877548218, "mean": 3.103859489783645e-05, "std": 0.038303472101688385, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13769029080867767, "max": 0.1125277578830719, "mean": 1.9220009562559426e-05, "std": 0.02578623965382576, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.40236374735832214, "max": 0.37038296461105347, "mean": 2.613713513710536e-05, "std": 0.03818493336439133, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7654759883880615, "max": 2.864607572555542, "mean": 0.0011372193694114685, "std": 0.51633220911026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20273104310035706, "max": 0.1974526047706604, "mean": 2.9206170438556e-05, "std": 0.034301165491342545, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05080447345972061, "max": 0.0398997887969017, "mean": -0.00042000875691883266, "std": 0.013411123305559158, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19611378014087677, "max": 0.20161780714988708, "mean": -1.2710506780422293e-05, "std": 0.03180883079767227, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19288454949855804, "max": 0.1946749985218048, "mean": -0.002961306367069483, "std": 0.06252170354127884, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3495900332927704, "max": 1.0818731784820557, "mean": 0.6670873165130615, "std": 0.054898131638765335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22497375309467316, "max": 0.25112366676330566, "mean": 0.00035900043440051377, "std": 0.04076608642935753, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09095952659845352, "max": 0.0440162755548954, "mean": -0.030070394277572632, "std": 0.017598489299416542, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35297849774360657, "max": 0.3037008047103882, "mean": -4.511567021836527e-05, "std": 0.03712863847613335, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.1615627110004425, "max": 0.06344226002693176, "mean": -7.402076153084636e-05, "std": 0.019400237128138542, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.3484867811203003, "max": 0.7205584049224854, "mean": 0.5422928333282471, "std": 0.03884059190750122, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21938610076904297, "max": 0.223092183470726, "mean": -1.1128584446851164e-05, "std": 0.0392366424202919, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11822070181369781, "max": 0.1703757494688034, "mean": 0.0002712813438847661, "std": 0.025094762444496155, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.2461908757686615, "max": 0.3006460666656494, "mean": -3.654139436548576e-05, "std": 0.03893598914146423, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.499889850616455, "max": 3.708961009979248, "mean": 0.01583799161016941, "std": 0.781475305557251, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21841062605381012, "max": 0.23724044859409332, "mean": -1.4060610737942625e-05, "std": 0.03630809485912323, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04710822552442551, "max": 0.05138855054974556, "mean": 0.00048449443420395255, "std": 0.013518092222511768, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21374864876270294, "max": 0.2171718180179596, "mean": 5.6465847592335194e-05, "std": 0.03361979499459267, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21103325486183167, "max": 0.2311553806066513, "mean": -0.005100366659462452, "std": 0.06185431033372879, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36209091544151306, "max": 1.0989015102386475, "mean": 0.6992126703262329, "std": 0.053264226764440536, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23423242568969727, "max": 0.24471710622310638, "mean": 0.00046349214971996844, "std": 0.04127512127161026, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09780248254537582, "max": 0.06824193894863129, "mean": -0.031424038112163544, "std": 0.018106156960129738, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.301416277885437, "max": 0.35142549872398376, "mean": -8.288547542179003e-05, "std": 0.04028111323714256, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15196339786052704, "max": 0.14944323897361755, "mean": 0.0002634537231642753, "std": 0.023027226328849792, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031249936670064926, "max": 0.031249839812517166, "mean": -1.9292721844976768e-05, "std": 0.01804409734904766, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031226642429828644, "max": 0.03100142627954483, "mean": -0.0010842883493751287, "std": 0.01795371063053608, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031249966472387314, "max": 0.031249895691871643, "mean": 3.5441100862954045e-06, "std": 0.018044503405690193, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031156372278928757, "max": 0.031184475868940353, "mean": 0.0003338930255267769, "std": 0.018065759912133217, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0003838505072053522, "max": 0.00040078736492432654, "mean": 7.502898370148614e-06, "std": 0.00012165026419097558, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9996746778488159, "max": 1.0017435550689697, "mean": 1.0005855560302734, "std": 0.0003091032849624753, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03248094022274017, "max": 0.03274688497185707, "mean": -1.2105063433409669e-05, "std": 0.01805892214179039, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.031171226873993874, "max": 0.03214619308710098, "mean": 0.0004906345857307315, "std": 0.017989112064242363, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.0009105296921916306, "max": 0.001230148016475141, "mean": 2.7432847673480865e-06, "std": 0.0001725118636386469, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.00036263937363401055, "max": 0.00041731935925781727, "mean": 7.396344699373003e-06, "std": 0.00011976793757639825, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.38287991285324097, "max": 0.7182613015174866, "mean": 0.5806185603141785, "std": 0.03863256797194481, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23785854876041412, "max": 0.19614756107330322, "mean": 2.640879392856732e-05, "std": 0.037470731884241104, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11855358630418777, "max": 0.16578993201255798, "mean": 0.0009884096216410398, "std": 0.027530910447239876, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.2458752989768982, "max": 0.500349223613739, "mean": -5.065255027147941e-05, "std": 0.03762831538915634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.936182975769043, "max": 3.763556957244873, "mean": -0.003569458145648241, "std": 0.6807414293289185, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22705353796482086, "max": 0.251341313123703, "mean": -1.142405926657375e-05, "std": 0.03743990138173103, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07149660587310791, "max": 0.08067727833986282, "mean": -0.0005162369925528765, "std": 0.015656527131795883, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22786642611026764, "max": 0.2578106224536896, "mean": -2.8714632207993418e-05, "std": 0.035426877439022064, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20022797584533691, "max": 0.21474605798721313, "mean": -0.005530310794711113, "std": 0.0683104544878006, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.4048909544944763, "max": 1.1872107982635498, "mean": 0.7378276586532593, "std": 0.05486491322517395, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22101044654846191, "max": 0.2458520382642746, "mean": 0.0005211633397266269, "std": 0.04134228080511093, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10363762825727463, "max": 0.023918237537145615, "mean": -0.03266144543886185, "std": 0.018866004422307014, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.4487850069999695, "max": 0.42181524634361267, "mean": -0.00043266150169074535, "std": 0.04690360650420189, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25105422735214233, "max": 0.46941903233528137, "mean": 0.003198462538421154, "std": 0.044503308832645416, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3172111511230469, "max": 0.33329516649246216, "mean": -2.550867066020146e-05, "std": 0.021290993317961693, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32461607456207275, "max": 0.6840938329696655, "mean": 0.5709556341171265, "std": 0.04454263672232628, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16456733644008636, "max": 0.17394505441188812, "mean": -4.8416688514407724e-05, "std": 0.03318499028682709, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.1864674687385559, "max": 0.14258594810962677, "mean": 3.8281112210825086e-05, "std": 0.029655346646904945, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3803539276123047, "max": 0.2457817941904068, "mean": -1.002950102702016e-05, "std": 0.032765936106443405, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6502115726470947, "max": 3.285125494003296, "mean": -0.014261167496442795, "std": 0.9845166206359863, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23496489226818085, "max": 0.24718151986598969, "mean": -1.8079399524140172e-05, "std": 0.041703000664711, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07261228561401367, "max": 0.15409623086452484, "mean": 0.0006618116749450564, "std": 0.02513669617474079, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.26620712876319885, "max": 0.24820521473884583, "mean": -1.5344019629992545e-05, "std": 0.04014336317777634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18921570479869843, "max": 0.19427257776260376, "mean": -0.0012257307535037398, "std": 0.0666433721780777, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32903727889060974, "max": 0.9973482489585876, "mean": 0.7190757393836975, "std": 0.051972683519124985, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23141932487487793, "max": 0.24504587054252625, "mean": 0.0001826788648031652, "std": 0.04090685769915581, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11396601796150208, "max": 0.01875537633895874, "mean": -0.04246020317077637, "std": 0.018833719193935394, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.38934653997421265, "max": 0.4067343473434448, "mean": -2.1657660909113474e-05, "std": 0.04854125902056694, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6919497847557068, "max": 0.411848247051239, "mean": 0.0008590769721195102, "std": 0.06023983284831047, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.000941734469961375, "max": 1.0006029605865479, "mean": 0.00048819385119713843, "std": 0.02209211327135563, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.031249970197677612, "max": 0.031249817460775375, "mean": -2.1022657165303826e-05, "std": 0.018035436049103737, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.03122086077928543, "max": 0.031233571469783783, "mean": -0.0006771883927285671, "std": 0.01782997138798237, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03124987706542015, "max": 0.031249921768903732, "mean": -8.839062502374873e-06, "std": 0.01803446188569069, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031232360750436783, "max": 0.031245984137058258, "mean": -0.0007298353011719882, "std": 0.017944591119885445, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0003224269312340766, "max": 0.0002993023081216961, "mean": 6.5217936935368925e-06, "std": 0.0001044638265739195, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9996813535690308, "max": 1.0015599727630615, "mean": 1.000339150428772, "std": 0.0002295201556989923, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.032516807317733765, "max": 0.03226118162274361, "mean": 4.161014203418745e-06, "std": 0.018049873411655426, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.031123636290431023, "max": 0.03165753185749054, "mean": 0.0003850722569040954, "std": 0.018070610240101814, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0009010994690470397, "max": 0.0009490308002568781, "mean": 2.8105064302508254e-06, "std": 0.00016459461767226458, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00032089874730445445, "max": 0.00031345486058853567, "mean": 6.42746908852132e-06, "std": 0.00010272208601236343, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23485393822193146, "max": 0.27267447113990784, "mean": 6.709969511575764e-06, "std": 0.018812596797943115, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32135409116744995, "max": 0.6922963857650757, "mean": 0.5815727710723877, "std": 0.045748595148324966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.1818080097436905, "max": 0.19750945270061493, "mean": -1.1748516044463031e-05, "std": 0.03318887948989868, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16036057472229004, "max": 0.12932586669921875, "mean": -0.0010664488654583693, "std": 0.03411008045077324, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33175674080848694, "max": 0.31088003516197205, "mean": -1.0311603546142578e-05, "std": 0.0322394073009491, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.791174411773682, "max": 8.749550819396973, "mean": 0.09336872398853302, "std": 1.6178374290466309, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23367103934288025, "max": 0.2417406141757965, "mean": 4.146722494624555e-05, "std": 0.04086144268512726, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07598260790109634, "max": 0.06560970842838287, "mean": 0.0004800831666216254, "std": 0.019395504146814346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24560654163360596, "max": 0.23375561833381653, "mean": -2.9877701308578253e-06, "std": 0.03943600133061409, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.1627652794122696, "max": 0.16063357889652252, "mean": 0.0016337584238499403, "std": 0.06525594741106033, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5568895936012268, "max": 0.9421334266662598, "mean": 0.7127605080604553, "std": 0.03978221118450165, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22847090661525726, "max": 0.25493934750556946, "mean": -4.550522498902865e-05, "std": 0.040581200271844864, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.1344706267118454, "max": 0.022221069782972336, "mean": -0.04133939743041992, "std": 0.01835877075791359, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4210115969181061, "max": 0.3920403718948364, "mean": -4.534296749625355e-06, "std": 0.047791384160518646, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6062420010566711, "max": 0.6502339243888855, "mean": 0.0015842054272070527, "std": 0.05679100751876831, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.252038836479187, "max": 0.32106301188468933, "mean": -6.296660103544127e-06, "std": 0.019615648314356804, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35961171984672546, "max": 0.6809778809547424, "mean": 0.5706169605255127, "std": 0.042782142758369446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22040791809558868, "max": 0.17709863185882568, "mean": -3.522756742313504e-05, "std": 0.03430448845028877, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16271811723709106, "max": 0.23246890306472778, "mean": 0.0003684491675812751, "std": 0.03280302509665489, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26368996500968933, "max": 0.23957668244838715, "mean": -5.283607606543228e-05, "std": 0.03390355408191681, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.8473591804504395, "max": 5.083388805389404, "mean": 0.04383918642997742, "std": 1.2279300689697266, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24628077447414398, "max": 0.2501535415649414, "mean": 7.219994586193934e-05, "std": 0.04399203881621361, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.062493205070495605, "max": 0.054467517882585526, "mean": 0.0006505983183160424, "std": 0.01718413643538952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2860679030418396, "max": 0.27162545919418335, "mean": -4.9951679102377966e-05, "std": 0.04299019277095795, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16042187809944153, "max": 0.1700378805398941, "mean": -0.0028904015198349953, "std": 0.05927493795752525, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5196736454963684, "max": 0.931270182132721, "mean": 0.7133467197418213, "std": 0.03808481991291046, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.2380017340183258, "max": 0.24893511831760406, "mean": 0.00046494320849888027, "std": 0.04046032205224037, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.1442948430776596, "max": 0.041139233857393265, "mean": -0.03967897593975067, "std": 0.020518682897090912, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5318877696990967, "max": 0.5818965435028076, "mean": 6.336260412354022e-06, "std": 0.048867613077163696, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5183113813400269, "max": 0.4925517439842224, "mean": 0.0023608217015862465, "std": 0.053406503051519394, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2738274037837982, "max": 0.31547796726226807, "mean": 1.8216255739389453e-06, "std": 0.02005232311785221, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3659067749977112, "max": 0.7100387215614319, "mean": 0.5930584073066711, "std": 0.04572707787156105, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21076832711696625, "max": 0.19927603006362915, "mean": 3.0815259378869087e-05, "std": 0.03487056866288185, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.186960831284523, "max": 0.20310287177562714, "mean": 0.0009555225260555744, "std": 0.03147275000810623, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.28951019048690796, "max": 0.33969932794570923, "mean": -4.744817124446854e-05, "std": 0.034591346979141235, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8711647987365723, "max": 3.3820366859436035, "mean": 0.01444312371313572, "std": 0.8576698899269104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.2244085818529129, "max": 0.249923974275589, "mean": -3.961446964240167e-06, "std": 0.04223531484603882, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05502909794449806, "max": 0.04645157977938652, "mean": -2.0665102056227624e-05, "std": 0.01583181880414486, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2927229106426239, "max": 0.2906007766723633, "mean": -7.488439223379828e-06, "std": 0.04195013642311096, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12459567189216614, "max": 0.25878894329071045, "mean": -0.0032436519395560026, "std": 0.053140122443437576, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4563407599925995, "max": 0.8428970575332642, "mean": 0.7054145932197571, "std": 0.03490997478365898, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5114501714706421, "max": 0.3482079803943634, "mean": 0.00034245854476466775, "std": 0.04020575433969498, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18575434386730194, "max": 0.03953104466199875, "mean": -0.03936902433633804, "std": 0.021325672045350075, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5437595248222351, "max": 0.5556712746620178, "mean": -7.024264050414786e-05, "std": 0.05074309557676315, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5109111666679382, "max": 0.6631372570991516, "mean": 0.002439212054014206, "std": 0.049490757286548615, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33253294229507446, "max": 0.2652721107006073, "mean": 3.378802830411587e-06, "std": 0.019389795139431953, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3220270276069641, "max": 0.7649413347244263, "mean": 0.6509413719177246, "std": 0.045111026614904404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2494993954896927, "max": 0.21881401538848877, "mean": -2.360827238589991e-06, "std": 0.03650495037436485, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.3266308009624481, "max": 0.28657323122024536, "mean": -0.0006807027384638786, "std": 0.038520634174346924, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.30951929092407227, "max": 0.36978626251220703, "mean": 6.48990971967578e-05, "std": 0.036245379596948624, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.710280895233154, "max": 5.798713684082031, "mean": 0.037927284836769104, "std": 1.4116240739822388, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22114244103431702, "max": 0.20574785768985748, "mean": -7.537077181041241e-05, "std": 0.04249110445380211, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07735679298639297, "max": 0.05145302414894104, "mean": -0.0009192783036269248, "std": 0.016400594264268875, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3307357728481293, "max": 0.32934609055519104, "mean": -4.647547484637471e-06, "std": 0.042797382920980453, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.28440576791763306, "max": 0.11188910901546478, "mean": -0.0012069176882505417, "std": 0.0469915047287941, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4862346351146698, "max": 0.8851982355117798, "mean": 0.7373509407043457, "std": 0.03795893117785454, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3612706959247589, "max": 0.27453744411468506, "mean": 5.114857412991114e-05, "std": 0.04065178707242012, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24725216627120972, "max": 0.04655319079756737, "mean": -0.03925145044922829, "std": 0.023245742544531822, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.625215470790863, "max": 0.5962166786193848, "mean": -5.8090816310141236e-05, "std": 0.05312598869204521, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7085027694702148, "max": 0.2653276026248932, "mean": 0.0009165835799649358, "std": 0.0511946901679039, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.34328368306159973, "max": 0.3035609722137451, "mean": 1.4504064438369824e-07, "std": 0.019138522446155548, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.3498779833316803, "max": 0.7813707590103149, "mean": 0.6387293338775635, "std": 0.049000099301338196, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20522303879261017, "max": 0.20651094615459442, "mean": -5.9693807997973636e-05, "std": 0.03769965097308159, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25792619585990906, "max": 0.2676540017127991, "mean": -0.0004065552493557334, "std": 0.044568419456481934, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3535814583301544, "max": 0.32190999388694763, "mean": -7.394870408461429e-06, "std": 0.037208281457424164, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.253505706787109, "max": 4.198240280151367, "mean": -0.026390478014945984, "std": 1.0056747198104858, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.2384454905986786, "max": 0.24342015385627747, "mean": -2.5527655452606268e-05, "std": 0.043215684592723846, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06227009370923042, "max": 0.05663022771477699, "mean": 0.0003446021000854671, "std": 0.01414022222161293, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43697887659072876, "max": 0.3737882673740387, "mean": 1.4649482181994244e-05, "std": 0.04412706196308136, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09632225334644318, "max": 0.1757834255695343, "mean": -0.0006590378470718861, "std": 0.03513453155755997, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4219363331794739, "max": 1.0674819946289062, "mean": 0.7483711838722229, "std": 0.041829537600278854, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.26578643918037415, "max": 0.29607900977134705, "mean": -7.925635145511478e-05, "std": 0.04081210494041443, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18497370183467865, "max": 0.04346155747771263, "mean": -0.03679885342717171, "std": 0.025566671043634415, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.45727846026420593, "max": 0.48611682653427124, "mean": 4.68605212518014e-05, "std": 0.05422008037567139, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.285878986120224, "max": 0.5506833791732788, "mean": -0.0008855935884639621, "std": 0.047791752964258194, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2927459478378296, "max": 0.32270148396492004, "mean": 6.155781647976255e-06, "std": 0.019972333684563637, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29097816348075867, "max": 0.7588945627212524, "mean": 0.6507570743560791, "std": 0.05195188894867897, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.24343979358673096, "max": 0.2611932158470154, "mean": -5.595570200966904e-06, "std": 0.039616428315639496, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2672193646430969, "max": 0.19968828558921814, "mean": -0.0008741158526390791, "std": 0.051719244569540024, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2713148593902588, "max": 0.25280529260635376, "mean": 4.686854481406044e-06, "std": 0.03871333599090576, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.945391654968262, "max": 15.922587394714355, "mean": 0.0331900492310524, "std": 1.9867922067642212, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20660938322544098, "max": 0.22584253549575806, "mean": -7.262543658725917e-05, "std": 0.04055970162153244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06933361291885376, "max": 0.06314393132925034, "mean": 0.00014905043644830585, "std": 0.014740395359694958, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46516552567481995, "max": 0.3203747570514679, "mean": 1.989086922549177e-05, "std": 0.04059458523988724, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06398216634988785, "max": 0.11521662026643753, "mean": 0.0011892176698893309, "std": 0.02469474822282791, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37489306926727295, "max": 0.9301723837852478, "mean": 0.7509260177612305, "std": 0.04003360494971275, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.27877017855644226, "max": 0.27262061834335327, "mean": -0.00016865786164999008, "std": 0.0410030372440815, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19846785068511963, "max": 0.05112157389521599, "mean": -0.032006848603487015, "std": 0.02506233938038349, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6571894884109497, "max": 0.5354637503623962, "mean": -4.8520763812121004e-05, "std": 0.05285634472966194, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19253292679786682, "max": 0.5813104510307312, "mean": -0.0005173450335860252, "std": 0.04104470834136009, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41767504811286926, "max": 0.3719256818294525, "mean": 6.585116807400482e-06, "std": 0.02162640169262886, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21444188058376312, "max": 0.7454288601875305, "mean": 0.6494399309158325, "std": 0.054196760058403015, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20942556858062744, "max": 0.19570672512054443, "mean": 4.021516360808164e-05, "std": 0.03946828842163086, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32898303866386414, "max": 0.2592002749443054, "mean": -0.0032279789447784424, "std": 0.05622360482811928, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2054453343153, "max": 0.2543545663356781, "mean": 5.45132061233744e-05, "std": 0.03857067599892616, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.233641624450684, "max": 6.921432971954346, "mean": 0.04828529804944992, "std": 1.3836402893066406, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20949925482273102, "max": 0.2304454892873764, "mean": -4.72849160360056e-06, "std": 0.041318491101264954, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04375026375055313, "max": 0.03585176169872284, "mean": -5.88857801631093e-07, "std": 0.012790623120963573, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39803647994995117, "max": 0.34512725472450256, "mean": -5.491710908245295e-05, "std": 0.042394764721393585, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.054978147149086, "max": 0.06269973516464233, "mean": 0.0003556903393473476, "std": 0.018663441762328148, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.35058680176734924, "max": 1.043295979499817, "mean": 0.789494514465332, "std": 0.04858649522066116, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.33317434787750244, "max": 0.3864516317844391, "mean": -0.00016881646297406405, "std": 0.041488684713840485, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15732650458812714, "max": 0.058728814125061035, "mean": -0.03181058540940285, "std": 0.025098087266087532, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6958801746368408, "max": 0.46852678060531616, "mean": -8.982194412965328e-05, "std": 0.05180330574512482, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24772712588310242, "max": 0.32808512449264526, "mean": -0.0002515119267627597, "std": 0.04140802100300789, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.28731903433799744, "max": 0.3503708243370056, "mean": -2.625113665999379e-06, "std": 0.024243580177426338, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19668713212013245, "max": 0.7778334617614746, "mean": 0.670162558555603, "std": 0.05853449925780296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.2283114343881607, "max": 0.23055444657802582, "mean": -2.0571733330143616e-05, "std": 0.04044181853532791, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.2195570170879364, "max": 0.24048519134521484, "mean": 0.000782210670877248, "std": 0.055770643055438995, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21605147421360016, "max": 0.22674262523651123, "mean": -7.179281237768009e-05, "std": 0.03937681019306183, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.892273902893066, "max": 9.054671287536621, "mean": -0.0012077325955033302, "std": 1.846124529838562, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2689066231250763, "max": 0.2583616375923157, "mean": 4.3370266212150455e-05, "std": 0.03841203823685646, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05771247297525406, "max": 0.05783558264374733, "mean": 0.00035597707028500736, "std": 0.014716549776494503, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2647928297519684, "max": 0.28871840238571167, "mean": -6.220719660632312e-05, "std": 0.0390787236392498, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04365166649222374, "max": 0.037368953227996826, "mean": -8.94215190783143e-05, "std": 0.013351045548915863, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.33930352330207825, "max": 1.090523362159729, "mean": 0.8638416528701782, "std": 0.06374476104974747, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4229956567287445, "max": 0.41935035586357117, "mean": 0.00031358242267742753, "std": 0.04351169988512993, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.2143605649471283, "max": 0.17033977806568146, "mean": -0.029430482536554337, "std": 0.031879011541604996, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5980925559997559, "max": 0.5593904852867126, "mean": -0.0001523983955848962, "std": 0.05345866456627846, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17843037843704224, "max": 0.3764672875404358, "mean": 0.0013608136214315891, "std": 0.037283699959516525, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3941720128059387, "max": 0.3687548339366913, "mean": 3.7372221413534135e-05, "std": 0.02862183377146721, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2906048893928528, "max": 0.825853168964386, "mean": 0.7055732607841492, "std": 0.0677838996052742, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9263502359390259, "max": 1.027148962020874, "mean": -2.6785823138197884e-05, "std": 0.04763893038034439, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8774253129959106, "max": 0.8142860531806946, "mean": -0.0003061135357711464, "std": 0.09545911848545074, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.2697736918926239, "max": 0.24071107804775238, "mean": -2.2601629098062404e-05, "std": 0.038958579301834106, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.70609474182129, "max": 22.81615447998047, "mean": -0.09178254753351212, "std": 4.064568042755127, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22739385068416595, "max": 0.24493008852005005, "mean": -2.535741987230722e-05, "std": 0.03864453360438347, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06026393920183182, "max": 0.045535702258348465, "mean": -0.00013921607751399279, "std": 0.014681815169751644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3383011817932129, "max": 0.3741171360015869, "mean": 6.997803211561404e-06, "std": 0.040823448449373245, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.046280112117528915, "max": 0.19523115456104279, "mean": 0.00027006896561942995, "std": 0.01355893723666668, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3735462725162506, "max": 1.1277151107788086, "mean": 0.8900589942932129, "std": 0.06382670253515244, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4478131830692291, "max": 0.5424441695213318, "mean": 2.4745060727582313e-05, "std": 0.04557563737034798, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22360379993915558, "max": 0.08794356882572174, "mean": -0.03199389576911926, "std": 0.03773387894034386, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7249262928962708, "max": 0.6877928376197815, "mean": 3.6950204957975075e-05, "std": 0.051789939403533936, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17425872385501862, "max": 0.21810372173786163, "mean": 3.0209601391106844e-05, "std": 0.03174462914466858, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.3392157554626465, "max": 0.3738991320133209, "mean": 4.299447755329311e-05, "std": 0.03414613753557205, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3178655207157135, "max": 1.2844390869140625, "mean": 0.6014401912689209, "std": 0.08323848247528076, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2828904390335083, "max": 0.260010302066803, "mean": -3.007857230841182e-06, "std": 0.03598371520638466, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.2351931631565094, "max": 0.20519772171974182, "mean": 0.00022795653785578907, "std": 0.055979955941438675, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.43529582023620605, "max": 0.32459068298339844, "mean": 2.450653482810594e-05, "std": 0.03413282707333565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.542441368103027, "max": 7.307634353637695, "mean": -0.007349876686930656, "std": 0.6985355019569397, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.3433660864830017, "max": 0.3625560700893402, "mean": 0.00010314527025911957, "std": 0.04783623665571213, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07354722917079926, "max": 0.060343291610479355, "mean": 0.0009371445048600435, "std": 0.014936422929167747, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.25582820177078247, "max": 0.286111980676651, "mean": 4.655210432247259e-06, "std": 0.04156283661723137, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05514800176024437, "max": 0.06263813376426697, "mean": 0.0001386886287946254, "std": 0.007160879671573639, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.4938517212867737, "max": 1.2188584804534912, "mean": 1.0133963823318481, "std": 0.11724550276994705, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.093719720840454, "max": 1.0471616983413696, "mean": -4.925714529235847e-05, "std": 0.05241731181740761, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.2243891805410385, "max": 0.172992542386055, "mean": -0.027224872261285782, "std": 0.03628592565655708, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8836102485656738, "max": 0.9222370386123657, "mean": -0.0001438588951714337, "std": 0.053294114768505096, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17069175839424133, "max": 0.37931114435195923, "mean": 0.003359442111104727, "std": 0.03984633460640907, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.777143120765686, "max": 0.7232267260551453, "mean": 1.830433029681444e-05, "std": 0.0461735762655735, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3386678695678711, "max": 1.4252641201019287, "mean": 0.9481973648071289, "std": 0.20639142394065857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.746235728263855, "max": 1.7046191692352295, "mean": 0.00022743589943274856, "std": 0.1587381213903427, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.1972129344940186, "max": 1.0979515314102173, "mean": -0.00952577032148838, "std": 0.2035541981458664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4209991693496704, "max": 0.42664653062820435, "mean": 6.461775046773255e-05, "std": 0.04803095757961273, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.71938133239746, "max": 19.514814376831055, "mean": -0.24804288148880005, "std": 4.770266532897949, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32366812229156494, "max": 0.43827319145202637, "mean": -1.2008969861199148e-05, "std": 0.04616396129131317, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03389401733875275, "max": 0.03695628046989441, "mean": 0.0006402541184797883, "std": 0.012914549559354782, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7030304074287415, "max": 0.6659538745880127, "mean": 4.320529478718527e-05, "std": 0.05788206309080124, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07218055427074432, "max": 0.0675114244222641, "mean": -0.0001346912613371387, "std": 0.012894386425614357, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3805386424064636, "max": 1.3893085718154907, "mean": 1.0665242671966553, "std": 0.21952925622463226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6161316633224487, "max": 0.717426061630249, "mean": 0.00011223374167457223, "std": 0.0580313578248024, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21904653310775757, "max": 0.22452397644519806, "mean": 0.006222008261829615, "std": 0.049658045172691345, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6296318769454956, "max": 0.8893842101097107, "mean": 1.2104990673833527e-05, "std": 0.02354114130139351, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5061390995979309, "max": 0.473175585269928, "mean": -0.003011696506291628, "std": 0.06919368356466293, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5380294322967529, "max": 1.1777888536453247, "mean": 0.7825304865837097, "std": 0.09833591431379318, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.26662442088127136, "max": 0.21249151229858398, "mean": -0.00022446915681939572, "std": 0.054007817059755325, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23786024749279022, "max": 0.014854340814054012, "mean": -0.04389730468392372, "std": 0.03425038233399391, "sparsity": 0.0, "shape": [ 100 ] } } }