diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.430247962474823, + "max": 0.29814788699150085, + "mean": -0.0025456156581640244, + "std": 0.042562179267406464, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06305033713579178, + "max": 0.10756707191467285, + "mean": 0.0006329622119665146, + "std": 0.03406817466020584, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4126828908920288, + "max": 0.8368642926216125, + "mean": -0.00020196933473926038, + "std": 0.024113450199365616, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11526867002248764, + "max": 0.3216077983379364, + "mean": -0.0009404964512214065, + "std": 0.019565371796488762, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.7922351360321045, + "max": 2.8709537982940674, + "mean": -0.0003647372650448233, + "std": 0.6154845356941223, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.27921348810195923, + "max": 0.38164129853248596, + "mean": 0.0004232236242387444, + "std": 0.04274886101484299, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.2224942147731781, + "max": 0.20972047746181488, + "mean": -0.004487486090511084, + "std": 0.040916070342063904, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4284340739250183, + "max": 0.47617435455322266, + "mean": 3.322187239973573e-06, + "std": 0.024511422961950302, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.32528114318847656, + "max": 0.15677402913570404, + "mean": -0.04670446366071701, + "std": 0.051589105278253555, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.41054657101631165, + "max": 0.3546879291534424, + "mean": -0.00012705953849945217, + "std": 0.023604456335306168, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.22982755303382874, + "max": 0.26271378993988037, + "mean": -0.029137738049030304, + "std": 0.049353621900081635, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.25457319617271423, + "max": 0.8201438188552856, + "mean": 0.5254908800125122, + "std": 0.08082503080368042, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.29710477590560913, + "max": 0.26579147577285767, + "mean": -0.0004257034743204713, + "std": 0.03210267424583435, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09286229312419891, + "max": 0.12479868531227112, + "mean": 0.0006487525533884764, + "std": 0.025735046714544296, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.290811687707901, + "max": 0.2813718020915985, + "mean": -7.56493245717138e-05, + "std": 0.030931707471609116, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.900395393371582, + "max": 5.815171718597412, + "mean": -0.009333105757832527, + "std": 1.295695185661316, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.4251435399055481, + "max": 0.3437366187572479, + "mean": 9.79713149718009e-05, + "std": 0.02995358221232891, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028972996398806572, + "max": 0.027724435552954674, + "mean": -0.00031865754863247275, + "std": 0.012574296444654465, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.45405057072639465, + "max": 0.44834038615226746, + "mean": 2.372298331465572e-05, + "std": 0.02385387383401394, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08870794624090195, + "max": 0.09110292047262192, + "mean": 0.0022859524469822645, + "std": 0.01951485686004162, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.26681551337242126, + "max": 1.056317687034607, + "mean": 0.5312033891677856, + "std": 0.10443911701440811, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5745526552200317, + "max": 0.6082873940467834, + "mean": -0.00043126955279149115, + "std": 0.03860025480389595, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18273141980171204, + "max": 0.04556818678975105, + "mean": -0.029461650177836418, + "std": 0.042611170560121536, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1671894788742065, + "max": 1.6339271068572998, + "mean": 0.0003239789803046733, + "std": 0.027696946635842323, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16238771378993988, + "max": 0.20571960508823395, + "mean": -0.021131085231900215, + "std": 0.02794588916003704, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22399598360061646, + "max": 0.8438678979873657, + "mean": 0.48765647411346436, + "std": 0.07522650808095932, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.2555526793003082, + "max": 0.305812269449234, + "mean": -6.7934306571260095e-06, + "std": 0.03347478806972504, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09538023918867111, + "max": 0.11050069332122803, + "mean": 6.53832103125751e-05, + "std": 0.02696637623012066, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.297147661447525, + "max": 0.2961280345916748, + "mean": 5.286935265758075e-05, + "std": 0.032545968890190125, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.165225028991699, + "max": 5.085448741912842, + "mean": -0.014597500674426556, + "std": 1.1575955152511597, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3449052572250366, + "max": 0.34331217408180237, + "mean": 7.911311695352197e-05, + "std": 0.03006201609969139, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.03610210865736008, + "max": 0.03328812122344971, + "mean": -0.0001417656458215788, + "std": 0.01303204894065857, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.3154510259628296, + "max": 0.37501609325408936, + "mean": -2.077353019558359e-05, + "std": 0.024059347808361053, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10547598451375961, + "max": 0.1221047043800354, + "mean": -0.0019677607342600822, + "std": 0.028854791074991226, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.31151488423347473, + "max": 1.1208997964859009, + "mean": 0.6663015484809875, + "std": 0.09774678200483322, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.8727833032608032, + "max": 0.6275414824485779, + "mean": 0.001675266888923943, + "std": 0.04743880406022072, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.2714674770832062, + "max": 0.03427550569176674, + "mean": -0.04661353677511215, + "std": 0.040598493069410324, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9226045608520508, + "max": 0.9647504687309265, + "mean": 0.0010200842516496778, + "std": 0.040706485509872437, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.1445719450712204, + "max": 0.07502147555351257, + "mean": -0.009089105762541294, + "std": 0.025694996118545532, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.24015086889266968, + "max": 0.7130303978919983, + "mean": 0.4472612142562866, + "std": 0.05932846665382385, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.27250105142593384, + "max": 0.29779112339019775, + "mean": 9.235942343366332e-06, + "std": 0.03546915203332901, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.1193777546286583, + "max": 0.11857955157756805, + "mean": 0.0007589810993522406, + "std": 0.02763049118220806, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.28105634450912476, + "max": 0.2798849046230316, + "mean": -7.697378896409646e-05, + "std": 0.0350995697081089, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.5100622177124023, + "max": 2.5220582485198975, + "mean": 0.02675231173634529, + "std": 0.5868890285491943, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.2211104929447174, + "max": 0.27162447571754456, + "mean": 2.60172691923799e-06, + "std": 0.030733274295926094, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.033548399806022644, + "max": 0.03133385255932808, + "mean": 0.00011904191342182457, + "std": 0.012407796457409859, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.23527584969997406, + "max": 0.23167696595191956, + "mean": 5.708727621822618e-05, + "std": 0.025696981698274612, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13586905598640442, + "max": 0.12758414447307587, + "mean": -0.0054936036467552185, + "std": 0.039962876588106155, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.35451188683509827, + "max": 1.1720999479293823, + "mean": 0.710637629032135, + "std": 0.10376914590597153, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6174948811531067, + "max": 0.5544577240943909, + "mean": 0.0011600415455177426, + "std": 0.04611966758966446, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.1883939653635025, + "max": 0.02492486871778965, + "mean": -0.03484141081571579, + "std": 0.028610829263925552, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.131612777709961, + "max": 0.9714275002479553, + "mean": 0.00035819801269099116, + "std": 0.04234758019447327, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5980822443962097, + "max": 0.06284141540527344, + "mean": -0.004877430386841297, + "std": 0.028617603704333305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.37526264786720276, + "max": 0.9405426383018494, + "mean": 0.5925549268722534, + "std": 0.0669507160782814, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.39145711064338684, + "max": 0.3691279888153076, + "mean": 7.120549707906321e-05, + "std": 0.03718876466155052, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11895960569381714, + "max": 0.13652607798576355, + "mean": 0.0009289687732234597, + "std": 0.029236802831292152, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.619219183921814, + "max": 0.5088949203491211, + "mean": 1.4944693248253316e-05, + "std": 0.036442093551158905, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.188663482666016, + "max": 8.790773391723633, + "mean": -0.10929473489522934, + "std": 1.6991605758666992, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.2766683101654053, + "max": 0.23983481526374817, + "mean": 5.299611802911386e-05, + "std": 0.032615721225738525, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.052095651626586914, + "max": 0.039515361189842224, + "mean": 9.424134623259306e-05, + "std": 0.012960628606379032, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23076868057250977, + "max": 0.234751895070076, + "mean": -2.1736430426244624e-05, + "std": 0.029392007738351822, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20435833930969238, + "max": 0.10555171221494675, + "mean": -0.004022371023893356, + "std": 0.03262435272336006, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.33977094292640686, + "max": 1.0126755237579346, + "mean": 0.7008676528930664, + "std": 0.0967569425702095, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5649488568305969, + "max": 0.8331477046012878, + "mean": 0.00041524306288920343, + "std": 0.04230210557579994, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21171551942825317, + "max": 0.030433084815740585, + "mean": -0.03218771517276764, + "std": 0.026509009301662445, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7544965744018555, + "max": 0.7186921834945679, + "mean": -1.2556927686091512e-05, + "std": 0.036842044442892075, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.26356518268585205, + "max": 0.10585562884807587, + "mean": -0.003026221413165331, + "std": 0.028868772089481354, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28427132964134216, + "max": 0.6951562762260437, + "mean": 0.4995492994785309, + "std": 0.046537742018699646, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.27920955419540405, + "max": 0.23424308001995087, + "mean": -0.00011120487761218101, + "std": 0.038762450218200684, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15435229241847992, + "max": 0.126743882894516, + "mean": -0.002232551807537675, + "std": 0.03338867425918579, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41404443979263306, + "max": 0.6600516438484192, + "mean": -1.9756593246711418e-05, + "std": 0.03909948095679283, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.238841533660889, + "max": 4.723404884338379, + "mean": -0.02046278491616249, + "std": 1.0078744888305664, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.24500444531440735, + "max": 0.20759114623069763, + "mean": 4.401802652864717e-05, + "std": 0.03396647423505783, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03457580879330635, + "max": 0.04486193135380745, + "mean": -1.914246240630746e-05, + "std": 0.012628658674657345, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20080946385860443, + "max": 0.20593363046646118, + "mean": -2.9703282052651048e-05, + "std": 0.03102399967610836, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.20000168681144714, + "max": 0.11336001008749008, + "mean": -0.002912652213126421, + "std": 0.03451835736632347, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.3670476973056793, + "max": 1.0570876598358154, + "mean": 0.6706215143203735, + "std": 0.06639451533555984, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.39835721254348755, + "max": 0.5023353695869446, + "mean": -3.849938002531417e-05, + "std": 0.0411369614303112, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12806333601474762, + "max": 0.026793837547302246, + "mean": -0.030542662367224693, + "std": 0.021876059472560883, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4490928053855896, + "max": 0.4329548478126526, + "mean": 7.997997454367578e-05, + "std": 0.03489622473716736, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.2676912248134613, + "max": 0.07277432084083557, + "mean": -0.0011054163333028555, + "std": 0.023129144683480263, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.28743863105773926, + "max": 0.6852545738220215, + "mean": 0.5245908498764038, + "std": 0.047539178282022476, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22235621511936188, + "max": 0.2234710454940796, + "mean": 1.5755222193547525e-05, + "std": 0.03895283117890358, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13644249737262726, + "max": 0.10925862938165665, + "mean": 0.00023633803357370198, + "std": 0.029229167848825455, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.3750911056995392, + "max": 0.4374293088912964, + "mean": -9.469786164117977e-06, + "std": 0.03928925842046738, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.8464367389678955, + "max": 5.000250816345215, + "mean": 0.009745623916387558, + "std": 0.8453732132911682, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22324559092521667, + "max": 0.22006931900978088, + "mean": -2.64663412963273e-07, + "std": 0.03441375494003296, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.04371564835309982, + "max": 0.03597109019756317, + "mean": -0.0002580236759968102, + "std": 0.012081029824912548, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21329627931118011, + "max": 0.1888744831085205, + "mean": -1.6700443666195497e-05, + "std": 0.03154045715928078, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.1808258593082428, + "max": 0.12078980356454849, + "mean": -0.002406290266662836, + "std": 0.04127614200115204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.42247915267944336, + "max": 0.9420861601829529, + "mean": 0.6627910733222961, + "std": 0.0568135567009449, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.3714267611503601, + "max": 0.47587329149246216, + "mean": -8.246101788245142e-05, + "std": 0.04089611768722534, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.208319753408432, + "max": 0.02722310833632946, + "mean": -0.03024582751095295, + "std": 0.021349623799324036, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.34010598063468933, + "max": 0.7335456013679504, + "mean": 8.291324775200337e-05, + "std": 0.03477157652378082, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.2402523010969162, + "max": 0.050502024590969086, + "mean": -0.0011936500668525696, + "std": 0.020464643836021423, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.3060189485549927, + "max": 0.6537417769432068, + "mean": 0.5251810550689697, + "std": 0.046129435300827026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.3043527901172638, + "max": 0.2173452079296112, + "mean": 6.987799861235544e-05, + "std": 0.03949924185872078, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.1495305597782135, + "max": 0.13139042258262634, + "mean": 0.0003452928503975272, + "std": 0.03046758659183979, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25741448998451233, + "max": 0.2021329253911972, + "mean": 3.105932046310045e-05, + "std": 0.039488501846790314, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.336733102798462, + "max": 2.376356840133667, + "mean": -0.026247980073094368, + "std": 0.44985267519950867, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.18904413282871246, + "max": 0.2104651778936386, + "mean": 3.720704626175575e-05, + "std": 0.03479856252670288, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03166992589831352, + "max": 0.035564228892326355, + "mean": -0.00020107123418711126, + "std": 0.012294227257370949, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18845464289188385, + "max": 0.17046742141246796, + "mean": -6.800049595767632e-05, + "std": 0.03217524290084839, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13940171897411346, + "max": 0.13724905252456665, + "mean": -0.002515769563615322, + "std": 0.05131084844470024, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4671289920806885, + "max": 0.9564934968948364, + "mean": 0.6689913272857666, + "std": 0.05279172211885452, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.3243524730205536, + "max": 0.30971962213516235, + "mean": -1.389088538417127e-06, + "std": 0.04095206782221794, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12475074827671051, + "max": 0.02534548193216324, + "mean": -0.03070956841111183, + "std": 0.019817529246211052, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.44013386964797974, + "max": 0.44524946808815, + "mean": 9.531535761198029e-05, + "std": 0.03512435778975487, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22465433180332184, + "max": 0.05168891325592995, + "mean": -0.0011842836393043399, + "std": 0.018476232886314392, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.3392145037651062, + "max": 0.739431619644165, + "mean": 0.5587528944015503, + "std": 0.04140577092766762, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2725517153739929, + "max": 0.2784435749053955, + "mean": 1.987360155908391e-05, + "std": 0.04106256738305092, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13695892691612244, + "max": 0.13984902203083038, + "mean": 0.00048777679330669343, + "std": 0.026632118970155716, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.4907291829586029, + "max": 0.35599952936172485, + "mean": 8.879909000825137e-05, + "std": 0.0407005213201046, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.2975404262542725, + "max": 1.7454535961151123, + "mean": -0.02108157053589821, + "std": 0.5002167820930481, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2176651507616043, + "max": 0.19791799783706665, + "mean": -4.056983016198501e-05, + "std": 0.03423743695020676, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.04131868854165077, + "max": 0.038581475615501404, + "mean": -0.00014208082575351, + "std": 0.012879491783678532, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17750245332717896, + "max": 0.18368542194366455, + "mean": 4.755006739287637e-05, + "std": 0.031560346484184265, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.17995940148830414, + "max": 0.18388336896896362, + "mean": -0.0022164953406900167, + "std": 0.05484570935368538, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.4742797613143921, + "max": 1.0257062911987305, + "mean": 0.6453534960746765, + "std": 0.05035950988531113, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.27185168862342834, + "max": 0.3093569278717041, + "mean": 0.00011239617015235126, + "std": 0.04068810120224953, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10582997649908066, + "max": 0.02683391235768795, + "mean": -0.029520545154809952, + "std": 0.01793094538152218, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.3390536606311798, + "max": 0.32923397421836853, + "mean": 5.560236604651436e-05, + "std": 0.03441813215613365, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.181716188788414, + "max": 0.04217486456036568, + "mean": -0.0010700200218707323, + "std": 0.017213836312294006, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32544824481010437, + "max": 0.6866950988769531, + "mean": 0.511271595954895, + "std": 0.036954350769519806, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.23384520411491394, + "max": 0.22571122646331787, + "mean": -3.601049320423044e-05, + "std": 0.0391816720366478, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.1153523325920105, + "max": 0.1316574662923813, + "mean": 0.000150712497998029, + "std": 0.029186168685555458, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.35289716720581055, + "max": 0.285473108291626, + "mean": 7.233719770738389e-06, + "std": 0.03925013542175293, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.133274078369141, + "max": 3.544353723526001, + "mean": -0.011593173258006573, + "std": 0.6827409267425537, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21133771538734436, + "max": 0.20911119878292084, + "mean": 3.477419522823766e-05, + "std": 0.034489333629608154, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03563081845641136, + "max": 0.04807223752140999, + "mean": 0.0007964536780491471, + "std": 0.012856329791247845, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21064519882202148, + "max": 0.19317731261253357, + "mean": -1.2986236015422037e-06, + "std": 0.03169986233115196, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.1866597682237625, + "max": 0.17717307806015015, + "mean": -0.002846275921911001, + "std": 0.05864023044705391, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.47464174032211304, + "max": 1.0418421030044556, + "mean": 0.6514742970466614, + "std": 0.049661051481962204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.2484884411096573, + "max": 0.3291080594062805, + "mean": 0.00018062048184219748, + "std": 0.040576666593551636, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12466001510620117, + "max": 0.024652821943163872, + "mean": -0.030505184084177017, + "std": 0.01760147698223591, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.42117249965667725, + "max": 0.48183169960975647, + "mean": 4.90086677018553e-07, + "std": 0.03540300950407982, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15187376737594604, + "max": 0.04340476170182228, + "mean": 4.305229231249541e-05, + "std": 0.014882412739098072, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.31561803817749023, + "max": 0.6820628046989441, + "mean": 0.5529670715332031, + "std": 0.04071620851755142, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20640292763710022, + "max": 0.2199181616306305, + "mean": 3.100156754953787e-05, + "std": 0.03830336779356003, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13785934448242188, + "max": 0.11272227019071579, + "mean": 2.0263127225916833e-05, + "std": 0.02582014910876751, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.4027767777442932, + "max": 0.37112095952033997, + "mean": 2.6220748623018153e-05, + "std": 0.038185179233551025, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.7714638710021973, + "max": 2.8691656589508057, + "mean": 0.0011573480442166328, + "std": 0.5169197916984558, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20294718444347382, + "max": 0.1975032389163971, + "mean": 2.9508448278647847e-05, + "std": 0.03430049493908882, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.050956204533576965, + "max": 0.04001324996352196, + "mean": -0.0004197848029434681, + "std": 0.013423827476799488, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.1965385526418686, + "max": 0.20179617404937744, + "mean": -1.230049292644253e-05, + "std": 0.03180824965238571, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.1932075023651123, + "max": 0.19514988362789154, + "mean": -0.002968719694763422, + "std": 0.06257235258817673, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.3494449555873871, + "max": 1.084139108657837, + "mean": 0.6672452688217163, + "std": 0.055235255509614944, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22517867386341095, + "max": 0.2515127956867218, + "mean": 0.0003590761625673622, + "std": 0.04076584428548813, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09105702489614487, + "max": 0.043770160526037216, + "mean": -0.030091021209955215, + "std": 0.0176088884472847, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.3535248339176178, + "max": 0.30410754680633545, + "mean": -4.392282062326558e-05, + "std": 0.03712813928723335, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16202455759048462, + "max": 0.06354078650474548, + "mean": -8.128902118187398e-05, + "std": 0.01940615102648735, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.34876754879951477, + "max": 0.7220309376716614, + "mean": 0.5424379706382751, + "std": 0.039069268852472305, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.2193686068058014, + "max": 0.22314214706420898, + "mean": -1.1116904715890996e-05, + "std": 0.03923606500029564, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11840695887804031, + "max": 0.1707676649093628, + "mean": 0.00028346438193693757, + "std": 0.025122247636318207, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.24684838950634003, + "max": 0.3010847866535187, + "mean": -3.651722363429144e-05, + "std": 0.038935575634241104, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.5055902004241943, + "max": 3.715036153793335, + "mean": 0.01585192233324051, + "std": 0.7825286984443665, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.21871182322502136, + "max": 0.2376304566860199, + "mean": -1.361081376671791e-05, + "std": 0.03630790859460831, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04719124361872673, + "max": 0.05140624940395355, + "mean": 0.00048010991304181516, + "std": 0.013516944833099842, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21404245495796204, + "max": 0.21762129664421082, + "mean": 5.64762121939566e-05, + "std": 0.03361983224749565, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.2114625871181488, + "max": 0.231521874666214, + "mean": -0.005106819327920675, + "std": 0.06188430264592171, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36219048500061035, + "max": 1.1013058423995972, + "mean": 0.6993670463562012, + "std": 0.053603965789079666, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23459365963935852, + "max": 0.2449057400226593, + "mean": 0.00046347593888640404, + "std": 0.04127476364374161, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09808015823364258, + "max": 0.06838114559650421, + "mean": -0.03143930807709694, + "std": 0.01812371425330639, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30170318484306335, + "max": 0.3515554368495941, + "mean": -8.153638191288337e-05, + "std": 0.040280573070049286, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15233194828033447, + "max": 0.14967864751815796, + "mean": 0.00025540069327689707, + "std": 0.023036718368530273, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.99940425157547, + "max": 1.0017729997634888, + "mean": 1.0002546310424805, + "std": 0.0006659556529484689, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.03126639127731323, + "max": 0.03126263990998268, + "mean": -1.9294351659482345e-05, + "std": 0.018044061958789825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031232889741659164, + "max": 0.03099249303340912, + "mean": -0.001084338640794158, + "std": 0.017953665927052498, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.031263306736946106, + "max": 0.031267084181308746, + "mean": 3.548895620042458e-06, + "std": 0.018044468015432358, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.03115880861878395, + "max": 0.031179169192910194, + "mean": 0.0003339822869747877, + "std": 0.018065886572003365, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.00013742789451498538, + "max": 0.00015863632143009454, + "mean": 2.736554449711548e-07, + "std": 4.781073585036211e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.9996252655982971, + "max": 1.0021158456802368, + "mean": 1.0004429817199707, + "std": 0.0006555348518304527, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.03161333501338959, + "max": 0.031580716371536255, + "mean": -9.014614079205785e-06, + "std": 0.018046868965029716, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.031167982146143913, + "max": 0.03145414963364601, + "mean": 0.0002899511018767953, + "std": 0.01800374686717987, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.00018904745229519904, + "max": 0.00019723534933291376, + "mean": 1.0521711502065045e-08, + "std": 3.849043423542753e-05, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.00014144052693154663, + "max": 0.00015886471373960376, + "mean": 2.7657870305120014e-07, + "std": 4.894055746262893e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.38299599289894104, + "max": 0.7195751070976257, + "mean": 0.5807684659957886, + "std": 0.03886786475777626, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23805734515190125, + "max": 0.19658388197422028, + "mean": 2.6588520995574072e-05, + "std": 0.037470221519470215, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11865263432264328, + "max": 0.16607660055160522, + "mean": 0.0009905615588650107, + "std": 0.027556024491786957, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.24617764353752136, + "max": 0.5007338523864746, + "mean": -5.0468875997466967e-05, + "std": 0.03762808069586754, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.9424328804016113, + "max": 3.7695746421813965, + "mean": -0.003572134766727686, + "std": 0.681464433670044, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.22736115753650665, + "max": 0.2514519989490509, + "mean": -1.1535179510246962e-05, + "std": 0.037439387291669846, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07172132283449173, + "max": 0.08075973391532898, + "mean": -0.0005193240358494222, + "std": 0.0156661756336689, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.2282123565673828, + "max": 0.25804591178894043, + "mean": -2.8565638785948977e-05, + "std": 0.03542618080973625, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20044255256652832, + "max": 0.21519678831100464, + "mean": -0.005535616539418697, + "std": 0.06834741681814194, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.40515244007110596, + "max": 1.1894633769989014, + "mean": 0.7380411624908447, + "std": 0.055237166583538055, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.221146821975708, + "max": 0.24604949355125427, + "mean": 0.0005211484967730939, + "std": 0.041342463344335556, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10338832437992096, + "max": 0.02417122572660446, + "mean": -0.03267121687531471, + "std": 0.018886109814047813, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.4494054913520813, + "max": 0.4224247634410858, + "mean": -0.0004330066149123013, + "std": 0.046903740614652634, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.2513982057571411, + "max": 0.47010472416877747, + "mean": 0.003200565231963992, + "std": 0.04454652965068817, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.3171570301055908, + "max": 0.33336329460144043, + "mean": -2.526402022340335e-05, + "std": 0.021290859207510948, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3245790898799896, + "max": 0.6854778528213501, + "mean": 0.5710608959197998, + "std": 0.04472013935446739, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16466441750526428, + "max": 0.1739748865365982, + "mean": -4.8596641136100516e-05, + "std": 0.03318468853831291, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18683482706546783, + "max": 0.14287494122982025, + "mean": 3.6249548429623246e-05, + "std": 0.029692435637116432, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.38059577345848083, + "max": 0.24607740342617035, + "mean": -9.968647646019235e-06, + "std": 0.03276587277650833, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.65606689453125, + "max": 3.290353775024414, + "mean": -0.01425391435623169, + "std": 0.9852582812309265, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23509447276592255, + "max": 0.24749873578548431, + "mean": -1.7839809515862726e-05, + "std": 0.04170282557606697, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07275734841823578, + "max": 0.15453355014324188, + "mean": 0.0006638452177867293, + "std": 0.025170044973492622, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.26656097173690796, + "max": 0.24857115745544434, + "mean": -1.5359542885562405e-05, + "std": 0.040143173187971115, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18948662281036377, + "max": 0.19466565549373627, + "mean": -0.0012274996843189, + "std": 0.06669430434703827, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.3292614817619324, + "max": 0.9995094537734985, + "mean": 0.7192604541778564, + "std": 0.05234057828783989, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.2315857857465744, + "max": 0.24574460089206696, + "mean": 0.00018271194130647928, + "std": 0.04090625420212746, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11421883851289749, + "max": 0.018689358606934547, + "mean": -0.04248232766985893, + "std": 0.018854642286896706, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.38993996381759644, + "max": 0.4073200523853302, + "mean": -2.1967953216517344e-05, + "std": 0.04854067787528038, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6932199001312256, + "max": 0.4125868082046509, + "mean": 0.0008555519161745906, + "std": 0.06029324233531952, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.0002173546963604167, + "max": 1.0001165866851807, + "mean": 0.0004882887005805969, + "std": 0.0220916960388422, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.9994292855262756, + "max": 1.0017839670181274, + "mean": 1.000253677368164, + "std": 0.000652652932330966, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.03126111254096031, + "max": 0.0312650129199028, + "mean": -2.1023370209150016e-05, + "std": 0.0180354006588459, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.031219881027936935, + "max": 0.031236713752150536, + "mean": -0.0006771213375031948, + "std": 0.017829909920692444, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03126417100429535, + "max": 0.03126959502696991, + "mean": -8.83279244590085e-06, + "std": 0.018034426495432854, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.03123662993311882, + "max": 0.03124932385981083, + "mean": -0.0007298794225789607, + "std": 0.01794484816491604, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.00017386232502758503, + "max": 0.00014760847261641175, + "mean": 3.442557272137492e-06, + "std": 5.325600432115607e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.9995221495628357, + "max": 1.0020443201065063, + "mean": 1.0004539489746094, + "std": 0.000669351196847856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03147042542695999, + "max": 0.03158598765730858, + "mean": 5.1154065658920445e-06, + "std": 0.018045036122202873, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.03117763064801693, + "max": 0.031405530869960785, + "mean": 0.00032266404014080763, + "std": 0.0180798526853323, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.00019398781296331435, + "max": 0.0002045449218712747, + "mean": 1.7092556845454965e-06, + "std": 3.9782767998985946e-05, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00017830374417826533, + "max": 0.0001471550203859806, + "mean": 3.7268218875396997e-06, + "std": 5.360128852771595e-05, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.23455342650413513, + "max": 0.27251818776130676, + "mean": 7.011342859186698e-06, + "std": 0.018812235444784164, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.3213299512863159, + "max": 0.6936513781547546, + "mean": 0.5816924571990967, + "std": 0.045936986804008484, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18183718621730804, + "max": 0.19770397245883942, + "mean": -1.1711626939359121e-05, + "std": 0.033187560737133026, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16069863736629486, + "max": 0.12950360774993896, + "mean": -0.001068056095391512, + "std": 0.03414401412010193, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.33220145106315613, + "max": 0.31142792105674744, + "mean": -1.0354739060858265e-05, + "std": 0.03223816305398941, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.803721904754639, + "max": 8.76359748840332, + "mean": 0.09347197413444519, + "std": 1.6197658777236938, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23378030955791473, + "max": 0.24203070998191833, + "mean": 4.133610491408035e-05, + "std": 0.0408620685338974, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07593037933111191, + "max": 0.06580135226249695, + "mean": 0.0004787116195075214, + "std": 0.019414879381656647, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.24592415988445282, + "max": 0.2340637594461441, + "mean": -2.9871353035559878e-06, + "std": 0.03943677991628647, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.1628992110490799, + "max": 0.16083794832229614, + "mean": 0.001633270876482129, + "std": 0.06527844816446304, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.5569714307785034, + "max": 0.9439458250999451, + "mean": 0.7129694819450378, + "std": 0.04013355076313019, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.2286839783191681, + "max": 0.2551024854183197, + "mean": -4.545085539575666e-05, + "std": 0.04058132320642471, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13476935029029846, + "max": 0.02225329726934433, + "mean": -0.04135678708553314, + "std": 0.018384402617812157, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.42168760299682617, + "max": 0.39237409830093384, + "mean": -4.401172191137448e-06, + "std": 0.04779110476374626, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6073517799377441, + "max": 0.6513891220092773, + "mean": 0.0015880158171057701, + "std": 0.05683854594826698, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.2518226206302643, + "max": 0.3207785189151764, + "mean": -6.094680884416448e-06, + "std": 0.019615668803453445, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.3598737120628357, + "max": 0.6824128031730652, + "mean": 0.5707628726959229, + "std": 0.0429723858833313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22058245539665222, + "max": 0.1771002560853958, + "mean": -3.480628220131621e-05, + "std": 0.0343024767935276, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16346584260463715, + "max": 0.23297329246997833, + "mean": 0.000366326654329896, + "std": 0.03285832703113556, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2638060748577118, + "max": 0.23985332250595093, + "mean": -5.253252311376855e-05, + "std": 0.033901575952768326, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.8552327156066895, + "max": 5.091460227966309, + "mean": 0.04388260096311569, + "std": 1.2293205261230469, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24656128883361816, + "max": 0.2505475580692291, + "mean": 7.217615348054096e-05, + "std": 0.043992768973112106, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.0626230239868164, + "max": 0.054548561573028564, + "mean": 0.0006508217193186283, + "std": 0.017192188650369644, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.2865524888038635, + "max": 0.2719300389289856, + "mean": -4.991707464796491e-05, + "std": 0.04299106448888779, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.1607704609632492, + "max": 0.17038598656654358, + "mean": -0.0028860813472419977, + "std": 0.05928485840559006, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5196844339370728, + "max": 0.9328820705413818, + "mean": 0.7135865688323975, + "std": 0.03841733559966087, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23817408084869385, + "max": 0.2493610382080078, + "mean": 0.00046480720629915595, + "std": 0.04046126455068588, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14443805813789368, + "max": 0.04147465527057648, + "mean": -0.03969287499785423, + "std": 0.020544789731502533, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5328277945518494, + "max": 0.5829682350158691, + "mean": 6.036185368429869e-06, + "std": 0.048868391662836075, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5192180871963501, + "max": 0.49342840909957886, + "mean": 0.0023608917836099863, + "std": 0.05344958230853081, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.2736181318759918, + "max": 0.31526556611061096, + "mean": 1.8652735889190808e-06, + "std": 0.020052799955010414, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.36623507738113403, + "max": 0.7115861177444458, + "mean": 0.5932326316833496, + "std": 0.045942164957523346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21099260449409485, + "max": 0.19959695637226105, + "mean": 3.07829977828078e-05, + "std": 0.034868910908699036, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18723583221435547, + "max": 0.20388372242450714, + "mean": 0.000956192088779062, + "std": 0.031518690288066864, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.28975075483322144, + "max": 0.3398789167404175, + "mean": -4.732892557512969e-05, + "std": 0.034589968621730804, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.877439260482788, + "max": 3.3875346183776855, + "mean": 0.014458952471613884, + "std": 0.858471155166626, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.22435642778873444, + "max": 0.249828040599823, + "mean": -4.0124336919689085e-06, + "std": 0.04223557561635971, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.05512487143278122, + "max": 0.046701643615961075, + "mean": -1.9162820535711944e-05, + "std": 0.015846921131014824, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.29301708936691284, + "max": 0.29095572233200073, + "mean": -7.334054771490628e-06, + "std": 0.04195055365562439, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12482845038175583, + "max": 0.25941941142082214, + "mean": -0.003237831173464656, + "std": 0.05315971001982689, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.4561736285686493, + "max": 0.8445789813995361, + "mean": 0.7056531310081482, + "std": 0.035228051245212555, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5114014148712158, + "max": 0.348456472158432, + "mean": 0.00034256701474078, + "std": 0.04020610451698303, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18698255717754364, + "max": 0.03949001431465149, + "mean": -0.03939007595181465, + "std": 0.0213507991284132, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.544358491897583, + "max": 0.5564395785331726, + "mean": -7.145745621528476e-05, + "std": 0.05074309930205345, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5117879509925842, + "max": 0.6644083857536316, + "mean": 0.002445152960717678, + "std": 0.04953145608305931, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.33249062299728394, + "max": 0.2656247019767761, + "mean": 3.6327573980088346e-06, + "std": 0.019390461966395378, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.3221387565135956, + "max": 0.7663495540618896, + "mean": 0.651084840297699, + "std": 0.04530828446149826, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.24955259263515472, + "max": 0.21952223777770996, + "mean": -2.4627406673971564e-06, + "std": 0.0365021638572216, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32713782787323, + "max": 0.2872367203235626, + "mean": -0.0006778471870347857, + "std": 0.03855384141206741, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.31010347604751587, + "max": 0.36993831396102905, + "mean": 6.482718890765682e-05, + "std": 0.036242760717868805, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.71769905090332, + "max": 5.807940483093262, + "mean": 0.03795948997139931, + "std": 1.4132622480392456, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.2217160314321518, + "max": 0.20588469505310059, + "mean": -7.503203232772648e-05, + "std": 0.04249139502644539, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07754088938236237, + "max": 0.051487792283296585, + "mean": -0.0009253682801499963, + "std": 0.016408486291766167, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.3308248519897461, + "max": 0.32916712760925293, + "mean": -4.993749826098792e-06, + "std": 0.042798057198524475, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2850324213504791, + "max": 0.1117776408791542, + "mean": -0.0012074043042957783, + "std": 0.047010280191898346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.4863123297691345, + "max": 0.8869433403015137, + "mean": 0.7375507354736328, + "std": 0.03823651745915413, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.36125612258911133, + "max": 0.27433156967163086, + "mean": 5.119972047396004e-05, + "std": 0.04065272584557533, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2477303296327591, + "max": 0.04647788032889366, + "mean": -0.03926857188344002, + "std": 0.023257533088326454, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6263415217399597, + "max": 0.5970607399940491, + "mean": -6.0351769207045436e-05, + "std": 0.05312627553939819, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.709812343120575, + "max": 0.2658604085445404, + "mean": 0.0009171634446829557, + "std": 0.051236364990472794, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3433721363544464, + "max": 0.30349576473236084, + "mean": 1.867878154371283e-07, + "std": 0.019139809533953667, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.34990525245666504, + "max": 0.7829033136367798, + "mean": 0.6388983726501465, + "std": 0.04923005402088165, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20573130249977112, + "max": 0.2069031298160553, + "mean": -5.999910717946477e-05, + "std": 0.037698354572057724, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25860944390296936, + "max": 0.2683144211769104, + "mean": -0.00040654174517840147, + "std": 0.04462500661611557, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.3541562557220459, + "max": 0.3225262761116028, + "mean": -7.357165486610029e-06, + "std": 0.03720669820904732, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.261901378631592, + "max": 4.204929351806641, + "mean": -0.026422729715704918, + "std": 1.0068349838256836, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23875762522220612, + "max": 0.24374397099018097, + "mean": -2.557489278842695e-05, + "std": 0.04321581870317459, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06234561279416084, + "max": 0.05673680081963539, + "mean": 0.00034723637509159744, + "std": 0.01415068656206131, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.4374503195285797, + "max": 0.37361523509025574, + "mean": 1.4507659216178581e-05, + "std": 0.044127773493528366, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09634225070476532, + "max": 0.17621064186096191, + "mean": -0.0006586947711184621, + "std": 0.035146258771419525, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.421725332736969, + "max": 1.0694254636764526, + "mean": 0.7485451698303223, + "std": 0.04206714406609535, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.2659734785556793, + "max": 0.2969002425670624, + "mean": -7.885815284680575e-05, + "std": 0.04081321880221367, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18494504690170288, + "max": 0.043268244713544846, + "mean": -0.03681334853172302, + "std": 0.025581398978829384, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4577294886112213, + "max": 0.4868638217449188, + "mean": 4.411918780533597e-05, + "std": 0.054221056401729584, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.286346971988678, + "max": 0.5518361330032349, + "mean": -0.0008815097389742732, + "std": 0.04783621430397034, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.29267972707748413, + "max": 0.3227570652961731, + "mean": 6.020641194481868e-06, + "std": 0.019972950220108032, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.2912514805793762, + "max": 0.7601991891860962, + "mean": 0.6508588194847107, + "std": 0.05212089791893959, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.2437000423669815, + "max": 0.26162612438201904, + "mean": -5.554972631216515e-06, + "std": 0.039614368230104446, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2675025463104248, + "max": 0.20013028383255005, + "mean": -0.0008774266461841762, + "std": 0.05176888778805733, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.27221566438674927, + "max": 0.25374382734298706, + "mean": 5.006398168916348e-06, + "std": 0.03871097415685654, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.966026306152344, + "max": 15.947824478149414, + "mean": 0.03323008120059967, + "std": 1.989342451095581, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.20656642317771912, + "max": 0.22588562965393066, + "mean": -7.24760175216943e-05, + "std": 0.040559086948633194, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06937043368816376, + "max": 0.06317680329084396, + "mean": 0.000156470196088776, + "std": 0.014745255932211876, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.46550098061561584, + "max": 0.32025203108787537, + "mean": 1.966371200978756e-05, + "std": 0.04059458151459694, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06405901163816452, + "max": 0.11548515409231186, + "mean": 0.0011954698711633682, + "std": 0.024709828197956085, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.37493425607681274, + "max": 0.9319035410881042, + "mean": 0.7510924339294434, + "std": 0.0401909314095974, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.27919864654541016, + "max": 0.273176908493042, + "mean": -0.0001684028684394434, + "std": 0.041004277765750885, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19848693907260895, + "max": 0.05126062035560608, + "mean": -0.032024383544921875, + "std": 0.025078732520341873, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6584433317184448, + "max": 0.5357221961021423, + "mean": -4.880438791587949e-05, + "std": 0.05285734310746193, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.19274669885635376, + "max": 0.5823217630386353, + "mean": -0.0005133696831762791, + "std": 0.041087545454502106, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.4175601005554199, + "max": 0.37188875675201416, + "mean": 6.479064722952899e-06, + "std": 0.021628154441714287, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.2145100235939026, + "max": 0.7467755675315857, + "mean": 0.6495225429534912, + "std": 0.054342612624168396, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20954373478889465, + "max": 0.19555190205574036, + "mean": 4.0139111661119387e-05, + "std": 0.03946155682206154, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32948848605155945, + "max": 0.2595402002334595, + "mean": -0.0032335962168872356, + "std": 0.05627242103219032, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.2058991640806198, + "max": 0.2547155022621155, + "mean": 5.40805995115079e-05, + "std": 0.03856402263045311, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.243993759155273, + "max": 6.932845115661621, + "mean": 0.048340216279029846, + "std": 1.385199785232544, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20978908240795135, + "max": 0.23056426644325256, + "mean": -4.742521468870109e-06, + "std": 0.04131828248500824, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.04378769174218178, + "max": 0.0359850712120533, + "mean": -6.261238013394177e-06, + "std": 0.012797025963664055, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.39764100313186646, + "max": 0.34504374861717224, + "mean": -5.53192148800008e-05, + "std": 0.0423952080309391, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.05508939549326897, + "max": 0.06280933320522308, + "mean": 0.0003585501981433481, + "std": 0.018675601109862328, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.3507746756076813, + "max": 1.0452601909637451, + "mean": 0.7896535992622375, + "std": 0.04874108359217644, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3336845338344574, + "max": 0.38642778992652893, + "mean": -0.00016908602265175432, + "std": 0.041490186005830765, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.1574612259864807, + "max": 0.05922037363052368, + "mean": -0.03182276338338852, + "std": 0.025103161111474037, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6963140964508057, + "max": 0.46921107172966003, + "mean": -8.656673162477091e-05, + "std": 0.05180606618523598, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24794527888298035, + "max": 0.3287939429283142, + "mean": -0.00025959889171645045, + "std": 0.04145469143986702, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.28705933690071106, + "max": 0.3503926694393158, + "mean": -2.8700230814138195e-06, + "std": 0.024241898208856583, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19675415754318237, + "max": 0.7791337370872498, + "mean": 0.6702517867088318, + "std": 0.05866968631744385, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.22908443212509155, + "max": 0.2313445806503296, + "mean": -2.062591738649644e-05, + "std": 0.040440406650304794, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.22002485394477844, + "max": 0.24098847806453705, + "mean": 0.00078444869723171, + "std": 0.0558483712375164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21667493879795074, + "max": 0.22645404934883118, + "mean": -7.211311458377168e-05, + "std": 0.03937484323978424, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.906242370605469, + "max": 9.069114685058594, + "mean": -0.0012534279376268387, + "std": 1.8484383821487427, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2695206105709076, + "max": 0.2589607834815979, + "mean": 4.368612644611858e-05, + "std": 0.03841120004653931, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05792244151234627, + "max": 0.05800376832485199, + "mean": 0.0003531992551870644, + "std": 0.014716269448399544, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.2641335129737854, + "max": 0.2883334755897522, + "mean": -6.170988490339369e-05, + "std": 0.03907797113060951, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.043938618153333664, + "max": 0.037385016679763794, + "mean": -9.84332655207254e-05, + "std": 0.013347743079066277, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.3393842577934265, + "max": 1.0925544500350952, + "mean": 0.8639589548110962, + "std": 0.0638754740357399, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.4231780469417572, + "max": 0.41907352209091187, + "mean": 0.0003135594888590276, + "std": 0.04351302981376648, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21478679776191711, + "max": 0.1706700474023819, + "mean": -0.02944377437233925, + "std": 0.03187936916947365, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5987504720687866, + "max": 0.5598719120025635, + "mean": -0.00014867217396385968, + "std": 0.05346066504716873, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17880699038505554, + "max": 0.37724727392196655, + "mean": 0.0013524596579372883, + "std": 0.037310197949409485, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.39442750811576843, + "max": 0.3689110279083252, + "mean": 3.764010398299433e-05, + "std": 0.028617940843105316, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.29055094718933105, + "max": 0.8275657296180725, + "mean": 0.7055599689483643, + "std": 0.06785259395837784, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9265406131744385, + "max": 1.0269172191619873, + "mean": -2.7786163627752103e-05, + "std": 0.04764207825064659, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8793070316314697, + "max": 0.8158283829689026, + "mean": -0.0003010375367011875, + "std": 0.09555298835039139, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.26992541551589966, + "max": 0.24092742800712585, + "mean": -2.246434632979799e-05, + "std": 0.03895093873143196, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.743555068969727, + "max": 22.852014541625977, + "mean": -0.09188304841518402, + "std": 4.070625305175781, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.22777004539966583, + "max": 0.2455480843782425, + "mean": -2.5490313419140875e-05, + "std": 0.03864210844039917, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.060185808688402176, + "max": 0.04548603296279907, + "mean": -0.00013778329594060779, + "std": 0.014688468538224697, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33804869651794434, + "max": 0.3748103082180023, + "mean": 7.576927600894123e-06, + "std": 0.04082098975777626, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.046251166611909866, + "max": 0.19543442130088806, + "mean": 0.00027753060567192733, + "std": 0.013553835451602936, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.37363529205322266, + "max": 1.1304537057876587, + "mean": 0.8902342319488525, + "std": 0.06401188671588898, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.44750913977622986, + "max": 0.5426135659217834, + "mean": 2.5048013412742876e-05, + "std": 0.0455806739628315, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22384138405323029, + "max": 0.08764129132032394, + "mean": -0.03201291710138321, + "std": 0.03774724155664444, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7260749936103821, + "max": 0.688654899597168, + "mean": 3.5635155654745176e-05, + "std": 0.051793280988931656, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.17447420954704285, + "max": 0.21816052496433258, + "mean": 3.443963942117989e-05, + "std": 0.03176717460155487, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.33968257904052734, + "max": 0.3729552924633026, + "mean": 4.328345676185563e-05, + "std": 0.034136127680540085, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.3178211450576782, + "max": 1.2872322797775269, + "mean": 0.6015591025352478, + "std": 0.08348726481199265, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.28302425146102905, + "max": 0.26023271679878235, + "mean": -2.7253747703070985e-06, + "std": 0.0359804667532444, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23563744127750397, + "max": 0.20571035146713257, + "mean": 0.00023820970091037452, + "std": 0.056028686463832855, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.43542271852493286, + "max": 0.3249562382698059, + "mean": 2.4268334527732804e-05, + "std": 0.034124359488487244, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.546493053436279, + "max": 7.314059257507324, + "mean": -0.007369840517640114, + "std": 0.6993855834007263, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.34410950541496277, + "max": 0.36279547214508057, + "mean": 0.0001030894200084731, + "std": 0.04783707857131958, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07371430099010468, + "max": 0.060424793511629105, + "mean": 0.0009352926863357425, + "std": 0.01493847742676735, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2562869191169739, + "max": 0.2867131233215332, + "mean": 4.736550181405619e-06, + "std": 0.04156505689024925, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.0553305447101593, + "max": 0.06281695514917374, + "mean": 0.00012849000631831586, + "std": 0.007162065710872412, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49391981959342957, + "max": 1.220736026763916, + "mean": 1.0135732889175415, + "std": 0.11749263107776642, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0939218997955322, + "max": 1.0474658012390137, + "mean": -4.883138171862811e-05, + "std": 0.05241798609495163, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.223901629447937, + "max": 0.17314252257347107, + "mean": -0.027228916063904762, + "std": 0.03630804270505905, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8840344548225403, + "max": 0.9224310517311096, + "mean": -0.00014670705422759056, + "std": 0.053297851234674454, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17102152109146118, + "max": 0.3797409236431122, + "mean": 0.003368864767253399, + "std": 0.0398765504360199, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7776780724525452, + "max": 0.7227001190185547, + "mean": 1.787853761925362e-05, + "std": 0.04615465924143791, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.3386647403240204, + "max": 1.4281901121139526, + "mean": 0.9484964609146118, + "std": 0.20680245757102966, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.745869517326355, + "max": 1.7045400142669678, + "mean": 0.00022709640325047076, + "std": 0.15870508551597595, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.1994972229003906, + "max": 1.1010137796401978, + "mean": -0.009549295529723167, + "std": 0.20389875769615173, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4210166335105896, + "max": 0.4279645085334778, + "mean": 6.39720747130923e-05, + "std": 0.04802015796303749, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.747936248779297, + "max": 19.543052673339844, + "mean": -0.24834343791007996, + "std": 4.777070999145508, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3238843083381653, + "max": 0.4385298192501068, + "mean": -1.1759563676605467e-05, + "std": 0.04616716504096985, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.03387872874736786, + "max": 0.036932073533535004, + "mean": 0.0006410478381440043, + "std": 0.01291597355157137, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7035592198371887, + "max": 0.6685189604759216, + "mean": 4.281650763005018e-05, + "std": 0.05789238214492798, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07232622057199478, + "max": 0.06769084185361862, + "mean": -0.00013414367276709527, + "std": 0.012906934134662151, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.3805098831653595, + "max": 1.3928314447402954, + "mean": 1.0667389631271362, + "std": 0.21977593004703522, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6165266633033752, + "max": 0.7183749079704285, + "mean": 0.00011245780478930101, + "std": 0.05802787095308304, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.21882832050323486, + "max": 0.2250150591135025, + "mean": 0.006199384108185768, + "std": 0.049713458865880966, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6297744512557983, + "max": 0.8895941972732544, + "mean": 1.2031738151563331e-05, + "std": 0.023544643074274063, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.506857693195343, + "max": 0.47375017404556274, + "mean": -0.003018573159351945, + "std": 0.06925369799137115, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5381409525871277, + "max": 1.1801701784133911, + "mean": 0.7828266620635986, + "std": 0.09875727444887161, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.2670648992061615, + "max": 0.21295404434204102, + "mean": -0.0002240903995698318, + "std": 0.054007235914468765, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23832593858242035, + "max": 0.014832383021712303, + "mean": -0.043932899832725525, + "std": 0.03429204970598221, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file