diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..bf4fa814b53fa15f0dd66b62ae43dc2d1c14de97 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,74 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N1024/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N1024/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N128/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N128/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N256/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N256/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N256/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N512/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k16_N512/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N1024/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N1024/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N128/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N128/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N128/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/consecutive_attention.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_cross_local.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_cross_mean.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_cross_overlay.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_heatmap.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_local_instances.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_local_smooth.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_monotonicity_summary.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_neighbor_cross.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N256/plots/qk_raw_structure.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/attn1_importance_analysis.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/attn_spread.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/attn_value_distance.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/consecutive_attention.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/consecutive_attention_grid.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/consecutive_attention_grid_ckpt60k.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/intervened_consecutive.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/intervened_consecutive_100k.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/intervened_consecutive_int5.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/l1_qk_interaction.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/l1_vs_l2_qk_comparison.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/length_generalization.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_cross_local.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_cross_mean.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_cross_overlay.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_cross_with_pos.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_deep_decomp.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_fixed_query.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_full_value_path.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_ty.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_60k.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_maxscore.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_slices.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_slices2.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_slices3.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_split.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_xt_split_slices.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_heatmap_zy.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_interaction_decomp.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_local_instances.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_local_smooth.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_monotonicity_summary.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_neighbor_cross.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_neighbor_score.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_query_side_decomp.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_raw_structure.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_residual_path.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_self_score.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_slope_profiles.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_value_heatmap.png filter=lfs diff=lfs merge=lfs -text +new-grid/k32_N512/plots/qk_value_heatmap_base.png filter=lfs diff=lfs merge=lfs -text diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt10000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt10000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b42409828085d2faa6efac21dd743ced308f70a2 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt10000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8474bcdfa9e03438efc5c9b7a0e05364e02f0329f50b5f82702604068c9b4ea8 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt new file mode 100644 index 0000000000000000000000000000000000000000..90cb53206e9f5f096e67d8206680fcb71f86b1f4 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6abd9600b9206c545cb471615315234567db7d4f26037f919854d0ea8108bfa +size 918949 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt15000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt15000.pt new file mode 100644 index 0000000000000000000000000000000000000000..93c9543f0b0f27b0e8fc6199c1fd71e52c1ded40 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt15000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504160082b9131e129fb3ce5deb9143ea8f17ce79819def70676b232248b48e4 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt20000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt20000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e0522dc9b72af61ddb50d1d5ea4266de45fcb64 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt20000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629cfb28178938761ba5cf23938837c135d77df00151b4ff2235191113f37aed +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt25000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt25000.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc013de0144d5e4d702adc8fb6e9e1ee3241c2f0 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt25000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f72b2fd86ad3acdb890ba4e8d14986c9caecaf35d275c0a01524794fe3fdbd +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt30000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt30000.pt new file mode 100644 index 0000000000000000000000000000000000000000..6688131a2523f2ad638d1f49900b900e06a9358a --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt30000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31f90f535fc57b7206b9fda66281de3ec57908bf3c42198d5f1610ef57df14b +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt35000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt35000.pt new file mode 100644 index 0000000000000000000000000000000000000000..fbcb0e642fbdc4f72e84aafa10c9f2bac3aa2ddd --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt35000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4af5a7087999a895b7b10a6dfde75f9ffeaaf11f6ac9edee6aa571cb0cc8e6 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt40000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt40000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cbdcf2a315cd3d09c66156996794af6425cf2a0 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt40000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4968709f49088450d1eba71404bbc95d253b84b75119fcba4bcce6937fc4ed75 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt45000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt45000.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf8f2c157939fd5cb4ca98215d469a694cf5bd9c --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt45000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1aa86329960e33707032c36e750ea346b2d6575f109c11e33dfb3016fb3d8d +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt5000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt5000.pt new file mode 100644 index 0000000000000000000000000000000000000000..568408af866b73eaece693a9a6e84832d6c5de5b --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt5000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb50cb1b6c97affc0bd7371ac10a58b3dedd07ccc9eae4f82e8b45ee70ac6957 +size 918879 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt50000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt50000.pt new file mode 100644 index 0000000000000000000000000000000000000000..76699d9d17309b212cdb206cb575f3458a2250e6 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt50000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fcadea35e7e7bd44d6b7c9adc7542fbc69bc8d89227da1e9454e5eba8743df9 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt55000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt55000.pt new file mode 100644 index 0000000000000000000000000000000000000000..15a82a3e3cd25edf32f9375dec430a8254f6ed68 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt55000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703ec62dbd7730a73b45248ee7c503c4409b6132f389532d1ff93f8ec5b98ebb +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt60000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt60000.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1dbebe7422a97280ebbf348a9b03836c5df3508 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt60000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe384383c23b5f0e4068fafe95e58a83b1fe0357a162aa95812fee985b3578c9 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt65000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt65000.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbc8b18916a27d18ccd8aef895a472a218f7e727 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt65000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f50e5acb3c13da7ab1e65cd4e957ea652aa6a64def830ac799aac318c41356 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt70000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt70000.pt new file mode 100644 index 0000000000000000000000000000000000000000..a87d637c2ac7a179c3decd1ff6020392c70d7b3b --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt70000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2495844b1242b7506ffa52c6c476d25d15b1cd1b69ce2582eee0974b1cfd9f5 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt75000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt75000.pt new file mode 100644 index 0000000000000000000000000000000000000000..60ed711835fc41d15e3f8d0442e509b87264702e --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt75000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2930aa155863ac129951258e0b39b2c1f46616fad023e48f8769d3201362ff59 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt80000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt80000.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2cff07371322066b6da2b9b398455f28b3fafd7 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt80000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af4fb7088213a08bbd637ff3acbefe524f361b4cd69a443748be4a2439624a8 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt85000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt85000.pt new file mode 100644 index 0000000000000000000000000000000000000000..7670493d81ca1ec83b64d58acd441c99f38e83ab --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt85000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01499164b32b25dfffe8272645d3614cdee749df291d6185e5482775e43009c4 +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt90000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt90000.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ea0516affe239ba618b691c20710c3af74b6ea5 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt90000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73164455b50012ca76976e8c9727c9f03487cae4755b9411f4bafe0b3b22dd2b +size 918914 diff --git a/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt95000.pt b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt95000.pt new file mode 100644 index 0000000000000000000000000000000000000000..da8c55a08cbd49f8961cb6265436b03a3c8f3469 --- /dev/null +++ b/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt95000.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16e97b823340e10d6dd687757155bb3ac685ee677ad525216196d25c6f3ccbc +size 918914 diff --git a/new-grid/k16_N1024/logs/plot.log b/new-grid/k16_N1024/logs/plot.log new file mode 100644 index 0000000000000000000000000000000000000000..fbe14a5b8229116499738e269ebc488dd464fb9b --- /dev/null +++ b/new-grid/k16_N1024/logs/plot.log @@ -0,0 +1,94 @@ +Loading checkpoint: /mnt/task_runtime/new-grid/k16_N1024/checkpoints/std0p01_iseed1__ckpt100000.pt + block_size=16, vocab_n=1024, device=cuda +Computing baseline... + full_seq_acc=1.0000 + Saved baseline plots +Computing ablation... + skip_layer=0: full_seq_acc=0.0000 + skip_layer=1: full_seq_acc=0.0000 + Saved ablation plots +Computing cinclogits... + Layer 0: done + Layer 1: done + Saved cinclogits plots +Computing intensity layer=0 ub=5... + WARNING: intensity=-0.50 got 3/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 3/200 valid after 2000 rounds + WARNING: intensity=0.00 got 6/200 valid after 2000 rounds + WARNING: intensity=0.25 got 3/200 valid after 2000 rounds + WARNING: intensity=0.50 got 2/200 valid after 2000 rounds + WARNING: intensity=0.75 got 3/200 valid after 2000 rounds + WARNING: intensity=1.00 got 9/200 valid after 2000 rounds + WARNING: intensity=1.25 got 5/200 valid after 2000 rounds + WARNING: intensity=1.50 got 5/200 valid after 2000 rounds + WARNING: intensity=1.75 got 6/200 valid after 2000 rounds + WARNING: intensity=2.00 got 8/200 valid after 2000 rounds + Done +Computing intensity layer=0 ub=10... + WARNING: intensity=-0.50 got 19/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 28/200 valid after 2000 rounds + WARNING: intensity=0.00 got 16/200 valid after 2000 rounds + WARNING: intensity=0.25 got 15/200 valid after 2000 rounds + WARNING: intensity=0.50 got 21/200 valid after 2000 rounds + WARNING: intensity=0.75 got 18/200 valid after 2000 rounds + WARNING: intensity=1.00 got 20/200 valid after 2000 rounds + WARNING: intensity=1.25 got 19/200 valid after 2000 rounds + WARNING: intensity=1.50 got 13/200 valid after 2000 rounds + WARNING: intensity=1.75 got 21/200 valid after 2000 rounds + WARNING: intensity=2.00 got 18/200 valid after 2000 rounds + Done +Computing intensity layer=0 ub=15... + WARNING: intensity=-0.50 got 49/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 53/200 valid after 2000 rounds + WARNING: intensity=0.00 got 41/200 valid after 2000 rounds + WARNING: intensity=0.25 got 48/200 valid after 2000 rounds + WARNING: intensity=0.50 got 43/200 valid after 2000 rounds + WARNING: intensity=0.75 got 38/200 valid after 2000 rounds + WARNING: intensity=1.00 got 42/200 valid after 2000 rounds + WARNING: intensity=1.25 got 49/200 valid after 2000 rounds + WARNING: intensity=1.50 got 46/200 valid after 2000 rounds + WARNING: intensity=1.75 got 40/200 valid after 2000 rounds + WARNING: intensity=2.00 got 34/200 valid after 2000 rounds + Done +Computing intensity layer=1 ub=5... + WARNING: intensity=-0.50 got 4/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 1/200 valid after 2000 rounds + WARNING: intensity=0.00 got 3/200 valid after 2000 rounds + WARNING: intensity=0.25 got 4/200 valid after 2000 rounds + WARNING: intensity=0.50 got 4/200 valid after 2000 rounds + WARNING: intensity=0.75 got 4/200 valid after 2000 rounds + WARNING: intensity=1.00 got 5/200 valid after 2000 rounds + WARNING: intensity=1.25 got 5/200 valid after 2000 rounds + WARNING: intensity=1.50 got 6/200 valid after 2000 rounds + WARNING: intensity=1.75 got 1/200 valid after 2000 rounds + WARNING: intensity=2.00 got 7/200 valid after 2000 rounds + Done +Computing intensity layer=1 ub=10... + WARNING: intensity=-0.50 got 26/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 20/200 valid after 2000 rounds + WARNING: intensity=0.00 got 16/200 valid after 2000 rounds + WARNING: intensity=0.25 got 15/200 valid after 2000 rounds + WARNING: intensity=0.50 got 26/200 valid after 2000 rounds + WARNING: intensity=0.75 got 10/200 valid after 2000 rounds + WARNING: intensity=1.00 got 19/200 valid after 2000 rounds + WARNING: intensity=1.25 got 15/200 valid after 2000 rounds + WARNING: intensity=1.50 got 19/200 valid after 2000 rounds + WARNING: intensity=1.75 got 25/200 valid after 2000 rounds + WARNING: intensity=2.00 got 13/200 valid after 2000 rounds + Done +Computing intensity layer=1 ub=15... + WARNING: intensity=-0.50 got 35/200 valid after 2000 rounds + WARNING: intensity=-0.25 got 40/200 valid after 2000 rounds + WARNING: intensity=0.00 got 43/200 valid after 2000 rounds + WARNING: intensity=0.25 got 48/200 valid after 2000 rounds + WARNING: intensity=0.50 got 41/200 valid after 2000 rounds + WARNING: intensity=0.75 got 40/200 valid after 2000 rounds + WARNING: intensity=1.00 got 49/200 valid after 2000 rounds + WARNING: intensity=1.25 got 41/200 valid after 2000 rounds + WARNING: intensity=1.50 got 50/200 valid after 2000 rounds + WARNING: intensity=1.75 got 42/200 valid after 2000 rounds + WARNING: intensity=2.00 got 33/200 valid after 2000 rounds + Done + Saved intensity plots + +All plots saved to /mnt/task_runtime/new-grid/k16_N1024/plots diff --git a/new-grid/k16_N1024/logs/train.log b/new-grid/k16_N1024/logs/train.log new file mode 100644 index 0000000000000000000000000000000000000000..e0e064c5ed7f5e142647566a259de66ad5cc899c --- /dev/null +++ b/new-grid/k16_N1024/logs/train.log @@ -0,0 +1,128 @@ +=== Training std0p01_iseed1 on GPU 3 for 100000 iters === + DEVICE=cuda, AMP_DTYPE=torch.bfloat16 + init_std=0.01, lr=0.03, batch_size=4096 + k=16, N=1024, E=64, L=2 + train std0p01_iseed1: 0%| | 0/100000 [00:00