{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43091416358947754, "max": 0.2991102933883667, "mean": -0.002557656727731228, "std": 0.04255230724811554, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06317874044179916, "max": 0.10845368355512619, "mean": 0.0006046494818292558, "std": 0.0341438427567482, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4125913977622986, "max": 0.8363389372825623, "mean": -0.0002094925002893433, "std": 0.024107541888952255, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11634448170661926, "max": 0.32392504811286926, "mean": -0.0009387563331983984, "std": 0.019654380157589912, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.8076894283294678, "max": 2.8856873512268066, "mean": -0.0003593244473449886, "std": 0.6153794527053833, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2804395258426666, "max": 0.38235825300216675, "mean": 0.00042111962102353573, "std": 0.0427500456571579, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22397927939891815, "max": 0.21124881505966187, "mean": -0.004504885524511337, "std": 0.04102449491620064, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.42797791957855225, "max": 0.4753724932670593, "mean": 3.1681217933510197e-06, "std": 0.024508841335773468, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.3278864026069641, "max": 0.15815186500549316, "mean": -0.046754755079746246, "std": 0.05172203853726387, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4108750522136688, "max": 0.3548462688922882, "mean": -0.0001276329276151955, "std": 0.023600950837135315, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.231490820646286, "max": 0.26459917426109314, "mean": -0.029202936217188835, "std": 0.049504559487104416, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2546604871749878, "max": 0.8254969120025635, "mean": 0.5257646441459656, "std": 0.08148879557847977, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2975306808948517, "max": 0.26634442806243896, "mean": -0.0004239020636305213, "std": 0.032103944569826126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.093165822327137, "max": 0.12537634372711182, "mean": 0.0006500760791823268, "std": 0.0257789958268404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.2912229299545288, "max": 0.2824551463127136, "mean": -7.682169962208718e-05, "std": 0.03093571960926056, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.9252495765686035, "max": 5.839654445648193, "mean": -0.00940663367509842, "std": 1.2986583709716797, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4255436658859253, "max": 0.34462970495224, "mean": 9.765196591615677e-05, "std": 0.02995290234684944, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028961628675460815, "max": 0.027653951197862625, "mean": -0.000311878917273134, "std": 0.012572262436151505, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4547809660434723, "max": 0.44922640919685364, "mean": 2.2741787688573822e-05, "std": 0.023854725062847137, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08907536417245865, "max": 0.09154797345399857, "mean": 0.0022746319882571697, "std": 0.019537169486284256, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2665960192680359, "max": 1.0631530284881592, "mean": 0.5315366387367249, "std": 0.10529287159442902, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5752094984054565, "max": 0.6091693043708801, "mean": -0.0004337065329309553, "std": 0.038595084100961685, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18266847729682922, "max": 0.04574590548872948, "mean": -0.02949558012187481, "std": 0.042705073952674866, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.168283462524414, "max": 1.6358791589736938, "mean": 0.0003184601664543152, "std": 0.027693841606378555, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.1632407307624817, "max": 0.20662632584571838, "mean": -0.02112644352018833, "std": 0.027983704581856728, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.2244432270526886, "max": 0.8492330312728882, "mean": 0.4877929091453552, "std": 0.07575991004705429, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25644662976264954, "max": 0.30648505687713623, "mean": -9.105999197345227e-06, "std": 0.03347046673297882, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09590143710374832, "max": 0.11091545224189758, "mean": 5.9943689848296344e-05, "std": 0.02701094001531601, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.29843541979789734, "max": 0.29746681451797485, "mean": 5.037898154114373e-05, "std": 0.0325385183095932, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.186855792999268, "max": 5.106731414794922, "mean": -0.014725911431014538, "std": 1.1609561443328857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34537965059280396, "max": 0.3438728153705597, "mean": 7.886411185609177e-05, "std": 0.030058259144425392, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.036315590143203735, "max": 0.033395010977983475, "mean": -0.00014420351362787187, "std": 0.013025550171732903, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3161202371120453, "max": 0.37616145610809326, "mean": -2.1655154341715388e-05, "std": 0.02405548468232155, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10574664920568466, "max": 0.12242550402879715, "mean": -0.0019548372365534306, "std": 0.028876660391688347, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.31179988384246826, "max": 1.1284958124160767, "mean": 0.6666731238365173, "std": 0.09859278053045273, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8728909492492676, "max": 0.6278397440910339, "mean": 0.0016749973874539137, "std": 0.047438185662031174, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2722160518169403, "max": 0.0340891033411026, "mean": -0.046644046902656555, "std": 0.04069075360894203, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.922055184841156, "max": 0.9654105305671692, "mean": 0.0010205680737271905, "std": 0.04070195555686951, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14518415927886963, "max": 0.07515987008810043, "mean": -0.009094657376408577, "std": 0.025729060173034668, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.2397412657737732, "max": 0.7171911001205444, "mean": 0.447447270154953, "std": 0.05987730622291565, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2741525173187256, "max": 0.29877936840057373, "mean": 8.61497210280504e-06, "std": 0.03547372668981552, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11957156658172607, "max": 0.11899449676275253, "mean": 0.0007509939605370164, "std": 0.0276488047093153, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2823837697505951, "max": 0.28084659576416016, "mean": -7.657262904103845e-05, "std": 0.035102009773254395, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5205748081207275, "max": 2.532623291015625, "mean": 0.02687813714146614, "std": 0.5879213809967041, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2220122367143631, "max": 0.27260157465934753, "mean": 2.5499884941382334e-06, "std": 0.030731454491615295, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03331878036260605, "max": 0.031287048012018204, "mean": 0.00011721440387191251, "std": 0.01239620428532362, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.2359972894191742, "max": 0.23261798918247223, "mean": 5.7136268878821284e-05, "std": 0.025697365403175354, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13661594688892365, "max": 0.12854568660259247, "mean": -0.005501019302755594, "std": 0.03999658301472664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3546392619609833, "max": 1.180222511291504, "mean": 0.7107274532318115, "std": 0.10418680310249329, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6183957457542419, "max": 0.5562719106674194, "mean": 0.001160319778136909, "std": 0.04611416533589363, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.19019058346748352, "max": 0.024931631982326508, "mean": -0.034878939390182495, "std": 0.028703488409519196, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1339737176895142, "max": 0.9729978442192078, "mean": 0.00035909697180613875, "std": 0.04234269633889198, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.6004759073257446, "max": 0.06302264332771301, "mean": -0.004885237663984299, "std": 0.028683220967650414, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.37538695335388184, "max": 0.9469302892684937, "mean": 0.5929263234138489, "std": 0.0680219903588295, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3926527798175812, "max": 0.37037163972854614, "mean": 7.004380313446745e-05, "std": 0.03718654066324234, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11952866613864899, "max": 0.1371433585882187, "mean": 0.0009209888521581888, "std": 0.029237791895866394, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6214983463287354, "max": 0.5109242796897888, "mean": 1.5226184586936142e-05, "std": 0.036439333111047745, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.222587585449219, "max": 8.827320098876953, "mean": -0.10952811688184738, "std": 1.7043956518173218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2775035798549652, "max": 0.24042560160160065, "mean": 5.222904292168096e-05, "std": 0.03261308744549751, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05175856128334999, "max": 0.03964223712682724, "mean": 9.375870286021382e-05, "std": 0.012972756288945675, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23131398856639862, "max": 0.2357378751039505, "mean": -2.203516305598896e-05, "std": 0.02938969060778618, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.2051505148410797, "max": 0.10573741793632507, "mean": -0.0040251207537949085, "std": 0.032664697617292404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3397069573402405, "max": 1.01918625831604, "mean": 0.7008247375488281, "std": 0.0969780907034874, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5670483708381653, "max": 0.8365305662155151, "mean": 0.00041504879482090473, "std": 0.042294830083847046, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.2130415141582489, "max": 0.029987983405590057, "mean": -0.03220636397600174, "std": 0.02657567895948887, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7582250833511353, "max": 0.7219672799110413, "mean": -1.576655267854221e-05, "std": 0.03683546185493469, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26458415389060974, "max": 0.10674209892749786, "mean": -0.003017352893948555, "std": 0.02890385128557682, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28402721881866455, "max": 0.6998150944709778, "mean": 0.49963071942329407, "std": 0.04700654000043869, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27952155470848083, "max": 0.23467987775802612, "mean": -0.00011085892765549943, "std": 0.038757603615522385, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15429016947746277, "max": 0.12700684368610382, "mean": -0.002232399070635438, "std": 0.033386100083589554, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41612547636032104, "max": 0.6611561179161072, "mean": -1.8461763829691336e-05, "std": 0.03909667953848839, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.2564592361450195, "max": 4.743135929107666, "mean": -0.020397484302520752, "std": 1.0097577571868896, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.2459408938884735, "max": 0.2083207219839096, "mean": 4.4360454921843484e-05, "std": 0.03396270051598549, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03462521731853485, "max": 0.045053571462631226, "mean": -2.1719199139624834e-05, "std": 0.012641450390219688, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20202401280403137, "max": 0.20743757486343384, "mean": -2.9260227165650576e-05, "std": 0.031020890921354294, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.20072369277477264, "max": 0.11369979381561279, "mean": -0.002900277031585574, "std": 0.03456325829029083, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3669256269931793, "max": 1.064845323562622, "mean": 0.6706051230430603, "std": 0.06665434688329697, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.4000990390777588, "max": 0.5037862062454224, "mean": -3.870507498504594e-05, "std": 0.04113040864467621, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12917247414588928, "max": 0.026963019743561745, "mean": -0.030557911843061447, "std": 0.021937619894742966, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4511619806289673, "max": 0.4353387653827667, "mean": 7.546078268205747e-05, "std": 0.03489077836275101, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.26869964599609375, "max": 0.07339140772819519, "mean": -0.0010946399997919798, "std": 0.023160062730312347, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.2875079810619354, "max": 0.6899884343147278, "mean": 0.5247476696968079, "std": 0.04796215519309044, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22366264462471008, "max": 0.2245350182056427, "mean": 1.589955536474008e-05, "std": 0.038949232548475266, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13696447014808655, "max": 0.10982562601566315, "mean": 0.0002473338390700519, "std": 0.029272515326738358, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37620943784713745, "max": 0.4390593469142914, "mean": -9.372964996146038e-06, "std": 0.039287250488996506, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8626632690429688, "max": 5.021180629730225, "mean": 0.009756950661540031, "std": 0.8471038937568665, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.2235114425420761, "max": 0.2212144434452057, "mean": -3.48434696206823e-07, "std": 0.03441031649708748, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04396739602088928, "max": 0.03608814626932144, "mean": -0.00025925497175194323, "std": 0.012080671265721321, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.2138509899377823, "max": 0.18955761194229126, "mean": -1.6947185940807685e-05, "std": 0.03153672814369202, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18172238767147064, "max": 0.12127514183521271, "mean": -0.0023971181362867355, "std": 0.04130159318447113, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.42289772629737854, "max": 0.9483197927474976, "mean": 0.6628358364105225, "std": 0.05716627463698387, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.37180185317993164, "max": 0.47763875126838684, "mean": -8.19972192402929e-05, "std": 0.040889617055654526, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.209408238530159, "max": 0.027359697967767715, "mean": -0.0302574522793293, "std": 0.021417709067463875, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3422113060951233, "max": 0.7372819185256958, "mean": 8.242652984336019e-05, "std": 0.034766409546136856, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2412174493074417, "max": 0.05068235844373703, "mean": -0.0011914315400645137, "std": 0.020485328510403633, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.30587607622146606, "max": 0.6579968333244324, "mean": 0.5253006219863892, "std": 0.0464390330016613, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30547264218330383, "max": 0.21810249984264374, "mean": 6.997188756940886e-05, "std": 0.039497073739767075, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14979463815689087, "max": 0.13157697021961212, "mean": 0.00032728962833061814, "std": 0.030529892072081566, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25832319259643555, "max": 0.20298458635807037, "mean": 3.122862472082488e-05, "std": 0.039488088339567184, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3464906215667725, "max": 2.3862874507904053, "mean": -0.0262940414249897, "std": 0.45072564482688904, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18955294787883759, "max": 0.211393803358078, "mean": 3.7051289837108925e-05, "std": 0.03479388728737831, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03182046860456467, "max": 0.03580700233578682, "mean": -0.0001974685292225331, "std": 0.012292041443288326, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18930117785930634, "max": 0.17112135887145996, "mean": -6.836307875346392e-05, "std": 0.03217054903507233, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.14002393186092377, "max": 0.1378386914730072, "mean": -0.0025169737637043, "std": 0.05131695047020912, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4669981598854065, "max": 0.9623145461082458, "mean": 0.669116199016571, "std": 0.053326528519392014, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.324962854385376, "max": 0.3098026514053345, "mean": -9.876448530121706e-07, "std": 0.0409456230700016, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12541106343269348, "max": 0.025640888139605522, "mean": -0.030711790546774864, "std": 0.019869431853294373, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.44164079427719116, "max": 0.4474758803844452, "mean": 9.588097600499168e-05, "std": 0.03511932119727135, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.2256106585264206, "max": 0.052044421434402466, "mean": -0.0011865352280437946, "std": 0.018494844436645508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.33912554383277893, "max": 0.7450283169746399, "mean": 0.558834433555603, "std": 0.041677191853523254, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27382639050483704, "max": 0.27962929010391235, "mean": 2.034128556260839e-05, "std": 0.0410577729344368, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13741885125637054, "max": 0.14038565754890442, "mean": 0.0004929338465444744, "std": 0.02668425627052784, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.49240002036094666, "max": 0.35733160376548767, "mean": 8.901266846805811e-05, "std": 0.04069547727704048, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.3072962760925293, "max": 1.7529240846633911, "mean": -0.021147169172763824, "std": 0.5008938312530518, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.21894769370555878, "max": 0.19816064834594727, "mean": -4.0161168726626784e-05, "std": 0.03423343971371651, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04133184999227524, "max": 0.03901350870728493, "mean": -0.00013613827468361706, "std": 0.012887353077530861, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17847225069999695, "max": 0.1837986409664154, "mean": 4.7998124500736594e-05, "std": 0.031556759029626846, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.180707648396492, "max": 0.18469232320785522, "mean": -0.0022159582003951073, "std": 0.05485893413424492, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4741988480091095, "max": 1.0330065488815308, "mean": 0.6454803347587585, "std": 0.05105094239115715, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2723560929298401, "max": 0.3096334636211395, "mean": 0.00011242127220612019, "std": 0.040681805461645126, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10577475279569626, "max": 0.026752889156341553, "mean": -0.029537281021475792, "std": 0.01797310821712017, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.3403210937976837, "max": 0.33086487650871277, "mean": 5.282106576487422e-05, "std": 0.034412968903779984, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18259213864803314, "max": 0.04268056899309158, "mean": -0.0010635886574164033, "std": 0.017230909317731857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32514795660972595, "max": 0.6914159655570984, "mean": 0.5113943219184875, "std": 0.03739636018872261, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2348308116197586, "max": 0.22631730139255524, "mean": -3.621048017521389e-05, "std": 0.0391756109893322, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11563856154680252, "max": 0.13239268958568573, "mean": 0.00015192970749922097, "std": 0.029222996905446053, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.35409149527549744, "max": 0.2863385081291199, "mean": 6.707018656015862e-06, "std": 0.03924466669559479, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.1504130363464355, "max": 3.5592541694641113, "mean": -0.011647488921880722, "std": 0.6845048069953918, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21134592592716217, "max": 0.21000461280345917, "mean": 3.47579552908428e-05, "std": 0.03448459133505821, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.036000702530145645, "max": 0.04817511513829231, "mean": 0.0007898924523033202, "std": 0.012873834930360317, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.2113579511642456, "max": 0.19389942288398743, "mean": -1.0706971806939691e-06, "std": 0.0316954106092453, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.1872350424528122, "max": 0.1779821664094925, "mean": -0.002844380447641015, "std": 0.058656178414821625, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4746103286743164, "max": 1.0489076375961304, "mean": 0.6516687870025635, "std": 0.05057830363512039, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24878337979316711, "max": 0.3296516239643097, "mean": 0.00018073963292408735, "std": 0.04057016968727112, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12595486640930176, "max": 0.02493392489850521, "mean": -0.030515050515532494, "std": 0.01764742285013199, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4225960969924927, "max": 0.4839133322238922, "mean": 1.030291969073005e-06, "std": 0.035397231578826904, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.1520412415266037, "max": 0.043631311506032944, "mean": 4.209935286780819e-05, "std": 0.014901721850037575, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31559863686561584, "max": 0.686523973941803, "mean": 0.553006649017334, "std": 0.040904585272073746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20726847648620605, "max": 0.22089692950248718, "mean": 3.191033465554938e-05, "std": 0.03829946741461754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13833385705947876, "max": 0.11308565735816956, "mean": 2.6655456167645752e-05, "std": 0.025857754051685333, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4046614170074463, "max": 0.37271684408187866, "mean": 2.56894181802636e-05, "std": 0.0381796769797802, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7873597145080566, "max": 2.881237506866455, "mean": 0.0011979229748249054, "std": 0.5181517601013184, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20434829592704773, "max": 0.19823738932609558, "mean": 2.9684193577850237e-05, "std": 0.03429735451936722, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.050780050456523895, "max": 0.040064383298158646, "mean": -0.00042128204950131476, "std": 0.01341989729553461, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.1970871537923813, "max": 0.20266157388687134, "mean": -1.2426969988155179e-05, "std": 0.031805407255887985, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.1938190907239914, "max": 0.19595396518707275, "mean": -0.0029727788642048836, "std": 0.06256895512342453, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.34895268082618713, "max": 1.0913121700286865, "mean": 0.6674203276634216, "std": 0.056132975965738297, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22646191716194153, "max": 0.25265538692474365, "mean": 0.0003584488877095282, "std": 0.040759678930044174, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09146817028522491, "max": 0.04364684969186783, "mean": -0.030097611248493195, "std": 0.017646051943302155, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35469669103622437, "max": 0.30548718571662903, "mean": -4.469315172173083e-05, "std": 0.03712276369333267, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.1623995155096054, "max": 0.06374479830265045, "mean": -8.042766421567649e-05, "std": 0.01944616436958313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34871119260787964, "max": 0.7271286249160767, "mean": 0.5425379872322083, "std": 0.03944627195596695, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.2201070785522461, "max": 0.2242431491613388, "mean": -1.1387233826098964e-05, "std": 0.03923100233078003, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11890711635351181, "max": 0.1713198721408844, "mean": 0.0002833662729244679, "std": 0.025163158774375916, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24783332645893097, "max": 0.30217495560646057, "mean": -3.6862991692032665e-05, "std": 0.038930460810661316, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.520315170288086, "max": 3.7306737899780273, "mean": 0.015852145850658417, "std": 0.7850235104560852, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21981129050254822, "max": 0.23816066980361938, "mean": -1.3107633094477933e-05, "std": 0.036303482949733734, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04740596562623978, "max": 0.05159047618508339, "mean": 0.000481397844851017, "std": 0.013528619892895222, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.2151964157819748, "max": 0.21832282841205597, "mean": 5.642603355227038e-05, "std": 0.03361587971448898, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.2122570425271988, "max": 0.23222938179969788, "mean": -0.005098365712910891, "std": 0.06190234050154686, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36193394660949707, "max": 1.1087924242019653, "mean": 0.6995820999145508, "std": 0.05450976639986038, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23606520891189575, "max": 0.24584993720054626, "mean": 0.00046336432569660246, "std": 0.041269052773714066, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09852692484855652, "max": 0.06841564178466797, "mean": -0.0314490832388401, "std": 0.01816665753722191, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30322569608688354, "max": 0.3532632291316986, "mean": -8.268543751910329e-05, "std": 0.04027474299073219, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15293245017528534, "max": 0.1503082662820816, "mean": 0.0002610071678645909, "std": 0.023066464811563492, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9987825155258179, "max": 1.011022686958313, "mean": 1.0016167163848877, "std": 0.004121079575270414, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03126484900712967, "max": 0.03125990182161331, "mean": -1.9292880097054876e-05, "std": 0.0180410947650671, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031222796067595482, "max": 0.030990226194262505, "mean": -0.001084181945770979, "std": 0.017950553447008133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03126567602157593, "max": 0.031269483268260956, "mean": 3.546300376910949e-06, "std": 0.018041500821709633, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.03114791214466095, "max": 0.03117155283689499, "mean": 0.0003340535331517458, "std": 0.018062960356473923, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0005971609498374164, "max": 0.0006745979771949351, "mean": 4.374485797598027e-06, "std": 0.0001794710842659697, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9978547096252441, "max": 1.0122681856155396, "mean": 1.0009429454803467, "std": 0.0034361695870757103, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03340178728103638, "max": 0.033508703112602234, "mean": -6.2318931668414734e-06, "std": 0.01804722100496292, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03293577954173088, "max": 0.03327555954456329, "mean": -0.00015042479208204895, "std": 0.017954858019948006, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.00139134272467345, "max": 0.0014818700728937984, "mean": 1.7994759673456429e-06, "std": 0.0002722168283071369, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005520335980691016, "max": 0.0007331477245315909, "mean": 7.149023986130487e-06, "std": 0.0001629332109587267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3833008110523224, "max": 0.7242851853370667, "mean": 0.5809347033500671, "std": 0.039344511926174164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.2398604303598404, "max": 0.19741135835647583, "mean": 2.61208933807211e-05, "std": 0.037466324865818024, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.1193285658955574, "max": 0.16746975481510162, "mean": 0.0009843853767961264, "std": 0.027611562982201576, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24755319952964783, "max": 0.5020493268966675, "mean": -5.023340054322034e-05, "std": 0.037623729556798935, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.959080934524536, "max": 3.785468339920044, "mean": -0.003608043771237135, "std": 0.6828969120979309, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.2280745655298233, "max": 0.25265711545944214, "mean": -1.1726486263796687e-05, "std": 0.037434931844472885, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07189386337995529, "max": 0.08095899969339371, "mean": -0.0005116118700243533, "std": 0.015669817104935646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22852574288845062, "max": 0.2589001953601837, "mean": -2.8789245334337465e-05, "std": 0.035421740263700485, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20139215886592865, "max": 0.21579185128211975, "mean": -0.005532890558242798, "std": 0.06838470697402954, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40495166182518005, "max": 1.1977423429489136, "mean": 0.7382426857948303, "std": 0.05618907883763313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22189897298812866, "max": 0.24627524614334106, "mean": 0.0005210949457250535, "std": 0.0413360670208931, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10370241105556488, "max": 0.024191563948988914, "mean": -0.03269057348370552, "std": 0.018939778208732605, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.45156151056289673, "max": 0.42444875836372375, "mean": -0.00043494877172634006, "std": 0.046896398067474365, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25261297821998596, "max": 0.47218039631843567, "mean": 0.0032064011320471764, "std": 0.0446014478802681, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3172667622566223, "max": 0.33354270458221436, "mean": -2.519888585084118e-05, "std": 0.021287826821208, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3245178461074829, "max": 0.6904165148735046, "mean": 0.5711733102798462, "std": 0.04502657428383827, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16521431505680084, "max": 0.1752052754163742, "mean": -4.8754882300272584e-05, "std": 0.033182479441165924, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18773159384727478, "max": 0.14384877681732178, "mean": 3.672283492051065e-05, "std": 0.02975340373814106, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.38243839144706726, "max": 0.24725475907325745, "mean": -9.841056453296915e-06, "std": 0.03276367485523224, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6714818477630615, "max": 3.3041720390319824, "mean": -0.014343326911330223, "std": 0.9862688779830933, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23551921546459198, "max": 0.24833251535892487, "mean": -1.8171514966525137e-05, "std": 0.041698355227708817, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07285058498382568, "max": 0.1551419198513031, "mean": 0.0006671739974990487, "std": 0.02518472634255886, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.26684004068374634, "max": 0.2486322820186615, "mean": -1.5217347026919015e-05, "std": 0.040139369666576385, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.19041800498962402, "max": 0.19548022747039795, "mean": -0.001239710720255971, "std": 0.06670945882797241, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.3291718661785126, "max": 1.0067707300186157, "mean": 0.7195272445678711, "std": 0.053192976862192154, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23261909186840057, "max": 0.24629585444927216, "mean": 0.0001829106913646683, "std": 0.04090041667222977, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11500220745801926, "max": 0.01902289316058159, "mean": -0.042502518743276596, "std": 0.01891784742474556, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3915143311023712, "max": 0.4093465507030487, "mean": -2.1941355953458697e-05, "std": 0.04853365942835808, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6959867477416992, "max": 0.41447487473487854, "mean": 0.0008487096056342125, "std": 0.06040440872311592, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0013131406158208847, "max": 1.000697135925293, "mean": 0.00048820037045516074, "std": 0.022089475765824318, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9987786412239075, "max": 1.0108789205551147, "mean": 1.0015242099761963, "std": 0.003978394437581301, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03125932812690735, "max": 0.031260255724191666, "mean": -2.101710924762301e-05, "std": 0.018032435327768326, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031216789036989212, "max": 0.0312344953417778, "mean": -0.0006770212785340846, "std": 0.017827019095420837, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03126361221075058, "max": 0.03126442804932594, "mean": -8.826009434415027e-06, "std": 0.018031461164355278, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031229715794324875, "max": 0.031247057020664215, "mean": -0.0007297845440916717, "std": 0.01794196106493473, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0004946183180436492, "max": 0.00040109679684974253, "mean": -3.799516889557708e-06, "std": 0.00014799994823988527, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9972319006919861, "max": 1.0116411447525024, "mean": 1.0005743503570557, "std": 0.0034592244774103165, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03315997123718262, "max": 0.032729245722293854, "mean": -2.570214064689935e-06, "std": 0.018028665333986282, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03235220909118652, "max": 0.03128715977072716, "mean": -0.00045961630530655384, "std": 0.018038177862763405, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0016143623506650329, "max": 0.001427292707376182, "mean": -1.0927603852906032e-06, "std": 0.00026996160158887506, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00045358933857642114, "max": 0.00036658692988567054, "mean": -3.5024249882553704e-06, "std": 0.0001358992449240759, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23466038703918457, "max": 0.2728899419307709, "mean": 6.680695605609799e-06, "std": 0.018810251727700233, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.3215275704860687, "max": 0.6988651752471924, "mean": 0.5818086862564087, "std": 0.04628920555114746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18249788880348206, "max": 0.1985490918159485, "mean": -1.1619875294854864e-05, "std": 0.0331842340528965, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16120854020118713, "max": 0.12988702952861786, "mean": -0.0010746754705905914, "std": 0.034188635647296906, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3333602249622345, "max": 0.31210559606552124, "mean": -1.0246277270198334e-05, "std": 0.03223477676510811, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.836638927459717, "max": 8.800041198730469, "mean": 0.09370891749858856, "std": 1.6243042945861816, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23471659421920776, "max": 0.24255934357643127, "mean": 4.1660623537609354e-05, "std": 0.04085636883974075, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07628928869962692, "max": 0.06604960560798645, "mean": 0.0004821753827854991, "std": 0.01943657174706459, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24707570672035217, "max": 0.2350512593984604, "mean": -3.330966137582436e-06, "std": 0.03943110629916191, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16370660066604614, "max": 0.16159522533416748, "mean": 0.0016214787028729916, "std": 0.06530040502548218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.556998610496521, "max": 0.9505069851875305, "mean": 0.7131754159927368, "std": 0.04095931351184845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22923102974891663, "max": 0.25587573647499084, "mean": -4.568279109662399e-05, "std": 0.040574610233306885, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13533200323581696, "max": 0.022116411477327347, "mean": -0.041375163942575455, "std": 0.018435189500451088, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42361417412757874, "max": 0.39315521717071533, "mean": -4.420744517119601e-06, "std": 0.047783900052309036, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6098850965499878, "max": 0.6541793942451477, "mean": 0.001589474268257618, "std": 0.056938592344522476, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.2520405650138855, "max": 0.3211195170879364, "mean": -6.1747768995701335e-06, "std": 0.019613485783338547, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35947033762931824, "max": 0.6870434284210205, "mean": 0.5708057880401611, "std": 0.04320356622338295, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22096499800682068, "max": 0.1776382476091385, "mean": -3.44411309924908e-05, "std": 0.034298770129680634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16386361420154572, "max": 0.23379802703857422, "mean": 0.0003647217818070203, "std": 0.032876912504434586, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.2648993730545044, "max": 0.2407570779323578, "mean": -5.283746577333659e-05, "std": 0.03389748930931091, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.875531196594238, "max": 5.112789630889893, "mean": 0.04403312876820564, "std": 1.231998324394226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24717208743095398, "max": 0.2512055039405823, "mean": 7.22141849109903e-05, "std": 0.043986547738313675, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06276638805866241, "max": 0.054656121879816055, "mean": 0.0006459522992372513, "std": 0.017198164016008377, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2877632677555084, "max": 0.2726806104183197, "mean": -5.0024795200442895e-05, "std": 0.042984671890735626, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16170376539230347, "max": 0.1710934340953827, "mean": -0.0028864555060863495, "std": 0.05931045860052109, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.51991868019104, "max": 0.9398472905158997, "mean": 0.7137647867202759, "std": 0.03922666609287262, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23831400275230408, "max": 0.2492961287498474, "mean": 0.00046471404493786395, "std": 0.040453460067510605, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14562036097049713, "max": 0.04111756384372711, "mean": -0.039718322455883026, "std": 0.02059181034564972, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5351076126098633, "max": 0.5854408740997314, "mean": 5.962188879493624e-06, "std": 0.0488593615591526, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5212635397911072, "max": 0.4954894483089447, "mean": 0.0023677186109125614, "std": 0.05354826897382736, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.27395325899124146, "max": 0.31585943698883057, "mean": 1.8985367660206975e-06, "std": 0.020050065591931343, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3660656809806824, "max": 0.7167491316795349, "mean": 0.593307375907898, "std": 0.04627520218491554, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21157211065292358, "max": 0.19981449842453003, "mean": 3.063139592995867e-05, "std": 0.03486718237400055, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.1879485547542572, "max": 0.2043510377407074, "mean": 0.0009530138340778649, "std": 0.031568389385938644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.29089149832725525, "max": 0.341105580329895, "mean": -4.692538641393185e-05, "std": 0.03458765521645546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.893813371658325, "max": 3.4017703533172607, "mean": 0.014513500966131687, "std": 0.8598799705505371, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22526344656944275, "max": 0.250789076089859, "mean": -3.7296154005161952e-06, "std": 0.042229536920785904, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05549817904829979, "max": 0.046731892973184586, "mean": -2.1666113752871752e-05, "std": 0.0158494021743536, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.29372450709342957, "max": 0.2908160388469696, "mean": -7.59748127165949e-06, "std": 0.041944604367017746, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12536406517028809, "max": 0.2601471245288849, "mean": -0.0032426435500383377, "std": 0.05318090319633484, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45628464221954346, "max": 0.8507043719291687, "mean": 0.7057910561561584, "std": 0.03590774908661842, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5123029351234436, "max": 0.34838762879371643, "mean": 0.0003429077914915979, "std": 0.04019884020090103, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.1866319328546524, "max": 0.039536003023386, "mean": -0.03940858319401741, "std": 0.021406862884759903, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5465707778930664, "max": 0.5584931969642639, "mean": -7.126475975383073e-05, "std": 0.050734348595142365, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5138925909996033, "max": 0.6670938730239868, "mean": 0.0024418262764811516, "std": 0.04960782080888748, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33276569843292236, "max": 0.26628994941711426, "mean": 3.292404471721966e-06, "std": 0.01938711293041706, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3219706416130066, "max": 0.7718862295150757, "mean": 0.651161789894104, "std": 0.04554183781147003, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2507328987121582, "max": 0.22062398493289948, "mean": -2.0154016056039836e-06, "std": 0.03650148585438728, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.3283964991569519, "max": 0.2880261540412903, "mean": -0.0006875221151858568, "std": 0.038663797080516815, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3113596737384796, "max": 0.37169572710990906, "mean": 6.504646444227546e-05, "std": 0.03624209389090538, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.737742900848389, "max": 5.83281946182251, "mean": 0.03801126033067703, "std": 1.4163931608200073, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.2227693796157837, "max": 0.2069622278213501, "mean": -7.526973786298186e-05, "std": 0.042485106736421585, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07798711210489273, "max": 0.05173616483807564, "mean": -0.0009264935506507754, "std": 0.016420088708400726, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3309612274169922, "max": 0.3296358287334442, "mean": -4.774779426952591e-06, "std": 0.04279141128063202, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.28600984811782837, "max": 0.11250722408294678, "mean": -0.0012054404942318797, "std": 0.04702861234545708, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4860813617706299, "max": 0.8933811783790588, "mean": 0.7376744747161865, "std": 0.038892824202775955, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.36275342106819153, "max": 0.2756327986717224, "mean": 5.113358929520473e-05, "std": 0.04064434394240379, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2486657202243805, "max": 0.046376701444387436, "mean": -0.03928756341338158, "std": 0.023350302129983902, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6290910840034485, "max": 0.5994174480438232, "mean": -6.010006836731918e-05, "std": 0.0531165786087513, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.712557315826416, "max": 0.26695698499679565, "mean": 0.000916715245693922, "std": 0.051312319934368134, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3435560464859009, "max": 0.3038403391838074, "mean": 2.054806600426673e-07, "std": 0.01913570426404476, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34980928897857666, "max": 0.7884078621864319, "mean": 0.6389412879943848, "std": 0.04949204996228218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.2064303159713745, "max": 0.2077268660068512, "mean": -5.987969052512199e-05, "std": 0.03769605979323387, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25974684953689575, "max": 0.26921483874320984, "mean": -0.000399288343032822, "std": 0.04469470679759979, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.35545018315315247, "max": 0.32378923892974854, "mean": -6.928052243893035e-06, "std": 0.03720466047525406, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.283975601196289, "max": 4.222393035888672, "mean": -0.0264443326741457, "std": 1.0090056657791138, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23976586759090424, "max": 0.24442994594573975, "mean": -2.508235047571361e-05, "std": 0.04320976510643959, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06259545683860779, "max": 0.0569254532456398, "mean": 0.00034189436701126397, "std": 0.014161717146635056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4372391402721405, "max": 0.37368500232696533, "mean": 1.4562616343027912e-05, "std": 0.044121067970991135, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09685619175434113, "max": 0.17668433487415314, "mean": -0.0006592039717361331, "std": 0.035167545080184937, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.42172640562057495, "max": 1.0772342681884766, "mean": 0.7485133409500122, "std": 0.04247161000967026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.26711001992225647, "max": 0.2980104982852936, "mean": -7.953734166221693e-05, "std": 0.04080444946885109, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18652470409870148, "max": 0.04387153312563896, "mean": -0.03684595599770546, "std": 0.025674043223261833, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4576263427734375, "max": 0.488967627286911, "mean": 4.3991476559313014e-05, "std": 0.05420954152941704, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.287752240896225, "max": 0.5537111759185791, "mean": -0.0008832515450194478, "std": 0.0479048416018486, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.29307857155799866, "max": 0.32305020093917847, "mean": 6.496340574813075e-06, "std": 0.01996980607509613, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29093778133392334, "max": 0.7654404640197754, "mean": 0.6508903503417969, "std": 0.05225415527820587, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2440621256828308, "max": 0.26225581765174866, "mean": -5.966384833300253e-06, "std": 0.03961286321282387, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.268706738948822, "max": 0.20074717700481415, "mean": -0.0008819116046652198, "std": 0.05185216665267944, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2733410894870758, "max": 0.2549380958080292, "mean": 4.216280103719328e-06, "std": 0.03870992735028267, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -13.020317077636719, "max": 16.015220642089844, "mean": 0.033375781029462814, "std": 1.9953062534332275, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.2079249769449234, "max": 0.22674520313739777, "mean": -7.217413804028183e-05, "std": 0.04055381566286087, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06965012848377228, "max": 0.06350152939558029, "mean": 0.00015418700058944523, "std": 0.014755439944565296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4655463695526123, "max": 0.3209993243217468, "mean": 1.953401260834653e-05, "std": 0.04058877378702164, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06434516608715057, "max": 0.1157260537147522, "mean": 0.001194344600662589, "std": 0.02471684291958809, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37466296553611755, "max": 0.9391067624092102, "mean": 0.7509991526603699, "std": 0.04050418362021446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.28077101707458496, "max": 0.274548202753067, "mean": -0.00016862244228832424, "std": 0.04099500924348831, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19967925548553467, "max": 0.0508696548640728, "mean": -0.03204797953367233, "std": 0.025167953222990036, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6607509851455688, "max": 0.5379750728607178, "mean": -4.8667719966033474e-05, "std": 0.052846137434244156, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1939390003681183, "max": 0.584657609462738, "mean": -0.0005122774746268988, "std": 0.041145551949739456, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41793951392173767, "max": 0.37214717268943787, "mean": 6.048314844520064e-06, "std": 0.02162175066769123, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21421198546886444, "max": 0.7522769570350647, "mean": 0.6496115922927856, "std": 0.054447393864393234, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.21056805551052094, "max": 0.1966959536075592, "mean": 4.008851828984916e-05, "std": 0.039464544504880905, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.33072784543037415, "max": 0.26050281524658203, "mean": -0.003235320094972849, "std": 0.056362900882959366, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20648598670959473, "max": 0.2557448148727417, "mean": 5.435877392301336e-05, "std": 0.038566704839468, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.270581245422363, "max": 6.962486743927002, "mean": 0.048468317836523056, "std": 1.3885526657104492, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.21042834222316742, "max": 0.23116129636764526, "mean": -5.202562988415593e-06, "std": 0.04131306707859039, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.044061992317438126, "max": 0.03610403463244438, "mean": 4.031957359984517e-06, "std": 0.012803297489881516, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39820992946624756, "max": 0.3451625406742096, "mean": -5.5655600590398535e-05, "std": 0.04238949343562126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05527956411242485, "max": 0.06314276903867722, "mean": 0.00036968549829907715, "std": 0.01868215762078762, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3502121865749359, "max": 1.0526388883590698, "mean": 0.789475679397583, "std": 0.049056656658649445, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.333749383687973, "max": 0.386434406042099, "mean": -0.00016950398276094347, "std": 0.04148067533969879, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15795546770095825, "max": 0.05914008617401123, "mean": -0.031855080276727676, "std": 0.025188777595758438, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6976608633995056, "max": 0.4709860682487488, "mean": -9.084228804567829e-05, "std": 0.051792342215776443, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24932992458343506, "max": 0.3299875855445862, "mean": -0.00024624879006296396, "std": 0.04149326682090759, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2875395119190216, "max": 0.3506205677986145, "mean": -2.1794317035528366e-06, "std": 0.02423883229494095, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19665004312992096, "max": 0.7845895886421204, "mean": 0.6703099608421326, "std": 0.05872485041618347, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22986678779125214, "max": 0.23209868371486664, "mean": -1.9775907276198268e-05, "std": 0.040440451353788376, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22065043449401855, "max": 0.2417624443769455, "mean": 0.0007816089782863855, "std": 0.05589631199836731, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21658743917942047, "max": 0.22758929431438446, "mean": -7.156423816923052e-05, "std": 0.03937661275267601, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.943953514099121, "max": 9.107547760009766, "mean": -0.0012157298624515533, "std": 1.8536982536315918, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2707418203353882, "max": 0.2602587938308716, "mean": 4.357028228696436e-05, "std": 0.03840764984488487, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05789529159665108, "max": 0.05795900523662567, "mean": 0.0003505878266878426, "std": 0.014736429788172245, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2662392258644104, "max": 0.2892150580883026, "mean": -6.152272544568405e-05, "std": 0.03907401114702225, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04396943002939224, "max": 0.037484679371118546, "mean": -8.678687299834564e-05, "std": 0.013375459238886833, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3395363390445709, "max": 1.100338101387024, "mean": 0.863823413848877, "std": 0.06409083306789398, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.423621267080307, "max": 0.4195392429828644, "mean": 0.0003127713571302593, "std": 0.04350290074944496, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21570223569869995, "max": 0.17136934399604797, "mean": -0.029504353180527687, "std": 0.032010503113269806, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.602144181728363, "max": 0.5620326995849609, "mean": -0.00015219957276713103, "std": 0.05344673991203308, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17926719784736633, "max": 0.37834614515304565, "mean": 0.0013675567461177707, "std": 0.037359848618507385, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39466091990470886, "max": 0.36930760741233826, "mean": 3.647102857939899e-05, "std": 0.028620684519410133, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2902662754058838, "max": 0.832281231880188, "mean": 0.7056034207344055, "std": 0.06793806701898575, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9263004064559937, "max": 1.0266234874725342, "mean": -2.5708328394102864e-05, "std": 0.04762601479887962, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8822629451751709, "max": 0.8186339139938354, "mean": -0.00031781112193129957, "std": 0.09582255780696869, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.27002349495887756, "max": 0.24192620813846588, "mean": -2.2872980480315164e-05, "std": 0.03895563259720802, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.84510040283203, "max": 22.94961166381836, "mean": -0.09204111993312836, "std": 4.085866928100586, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22870811820030212, "max": 0.24587669968605042, "mean": -2.573069286881946e-05, "std": 0.03863922879099846, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06067140772938728, "max": 0.046225275844335556, "mean": -0.0001460441417293623, "std": 0.014704843983054161, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3391576111316681, "max": 0.3760104775428772, "mean": 7.383272532024421e-06, "std": 0.040815357118844986, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04665788635611534, "max": 0.19654953479766846, "mean": 0.0002728282706812024, "std": 0.013587887398898602, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37436628341674805, "max": 1.138013482093811, "mean": 0.8901113271713257, "std": 0.06415355205535889, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.44819676876068115, "max": 0.5436740517616272, "mean": 2.450778629281558e-05, "std": 0.04556773602962494, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.2250596135854721, "max": 0.08822774887084961, "mean": -0.03204711154103279, "std": 0.0378473699092865, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7285163402557373, "max": 0.6922004222869873, "mean": 3.462535823928192e-05, "std": 0.051778655499219894, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.1753203570842743, "max": 0.21950407326221466, "mean": 4.071232979185879e-05, "std": 0.0318208709359169, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.34123340249061584, "max": 0.37526530027389526, "mean": 4.290333163226023e-05, "std": 0.0341440849006176, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31759148836135864, "max": 1.2954586744308472, "mean": 0.6016563177108765, "std": 0.08407581597566605, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2837989628314972, "max": 0.2609255015850067, "mean": -3.0735166092199506e-06, "std": 0.035984087735414505, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23655052483081818, "max": 0.2062867432832718, "mean": 0.0002321804640814662, "std": 0.05606939643621445, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.436277836561203, "max": 0.3261794447898865, "mean": 2.4473378289258108e-05, "std": 0.03413478285074234, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.569121360778809, "max": 7.344529628753662, "mean": -0.007453735917806625, "std": 0.7020133137702942, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.3451450765132904, "max": 0.36535224318504333, "mean": 0.0001032469590427354, "std": 0.047828368842601776, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07407404482364655, "max": 0.06063373386859894, "mean": 0.0009325749706476927, "std": 0.014960682019591331, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.25645625591278076, "max": 0.28786128759384155, "mean": 4.184576027910225e-06, "std": 0.041555535048246384, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05557131767272949, "max": 0.06310223042964935, "mean": 0.00014075382205192, "std": 0.0071859210729599, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.4938402473926544, "max": 1.2290534973144531, "mean": 1.0134642124176025, "std": 0.1175011619925499, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939291715621948, "max": 1.0472568273544312, "mean": -4.937269113725051e-05, "std": 0.052410781383514404, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22465373575687408, "max": 0.17359215021133423, "mean": -0.027279244735836983, "std": 0.0364469476044178, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8881030678749084, "max": 0.9261159300804138, "mean": -0.00014599041605833918, "std": 0.05328277125954628, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17176949977874756, "max": 0.3815639615058899, "mean": 0.003376794047653675, "std": 0.03997529670596123, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7789531350135803, "max": 0.725176990032196, "mean": 1.8912758605438285e-05, "std": 0.04616439342498779, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3386198878288269, "max": 1.43718421459198, "mean": 0.9484164714813232, "std": 0.2068886160850525, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7457443475723267, "max": 1.7046759128570557, "mean": 0.00022706578602083027, "std": 0.15868695080280304, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.2048320770263672, "max": 1.1044596433639526, "mean": -0.009567854925990105, "std": 0.20464132726192474, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4219454526901245, "max": 0.42726483941078186, "mean": 6.450812361435965e-05, "std": 0.04801829159259796, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.830074310302734, "max": 19.624286651611328, "mean": -0.24912264943122864, "std": 4.795468807220459, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32499611377716064, "max": 0.43987926840782166, "mean": -1.1840356819448061e-05, "std": 0.04616156592965126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.034201864153146744, "max": 0.03727949783205986, "mean": 0.0006420350982807577, "std": 0.012923939153552055, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7049213647842407, "max": 0.6658478379249573, "mean": 4.366881330497563e-05, "std": 0.057883720844984055, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07255180925130844, "max": 0.06780894845724106, "mean": -0.00013478109030984342, "std": 0.012948636896908283, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38018205761909485, "max": 1.3912252187728882, "mean": 1.0665678977966309, "std": 0.21972529590129852, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6171136498451233, "max": 0.7182933688163757, "mean": 0.00011123980220872909, "std": 0.05802140384912491, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.22050145268440247, "max": 0.2261514961719513, "mean": 0.006267528980970383, "std": 0.04982294142246246, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6300009489059448, "max": 0.8896978497505188, "mean": 1.1602171070990153e-05, "std": 0.023528659716248512, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5090406537055969, "max": 0.47603797912597656, "mean": -0.003031304571777582, "std": 0.0695611834526062, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5378094911575317, "max": 1.184032917022705, "mean": 0.7829163670539856, "std": 0.09918713569641113, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.26840853691101074, "max": 0.21375010907649994, "mean": -0.00022396638814825565, "std": 0.05399699881672859, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23899979889392853, "max": 0.014829290099442005, "mean": -0.04399246349930763, "std": 0.034442439675331116, "sparsity": 0.0, "shape": [ 100 ] } } }