diff --git a/.gitattributes b/.gitattributes index 7d6a1d8b7e27922da8b85c69290a99be4ab47a3a..a93f0b8132b2cb2e6bb634ac2e19116083bf3032 100644 --- a/.gitattributes +++ b/.gitattributes @@ -313,3 +313,604 @@ output/lora_128_dim_384_A100/step_vis/val_1_step_000000_sample01.png filter=lfs output/lora_128_dim_384_A100/step_vis/val_1_step_000000_sample02.png filter=lfs diff=lfs merge=lfs -text output/lora_128_dim_384_A100/step_vis/val_1_step_000000_sample03.png filter=lfs diff=lfs merge=lfs -text output/lora_128_dim_384_A100/step_vis/val_1_step_000000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/inference_samples/JAX_Tile_018_007_4_seed0_steps28.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample04.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample00.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample01.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample02.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample03.png filter=lfs diff=lfs merge=lfs -text +output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample04.png filter=lfs diff=lfs merge=lfs -text diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/control_params.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/control_params.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7aa47e511d61105b60749a08ef7c5c87645e2ae --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/control_params.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbeb95216e53dccecf9dfd1e22637df765541b571d63411200ad8fc5298385f1 +size 8345739463 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/hdc2a.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/hdc2a.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c6da39d7dd96f3add89bb3a5365299ad45c5677 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/hdc2a.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8863847b12bb949dbd430b44f37d19840ef73765c165d97bc1a8500908f811b7 +size 29132268 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/meta.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/meta.pt new file mode 100644 index 0000000000000000000000000000000000000000..22c0e37ade3152962a134a3b01f7e74ba86d0d7b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/meta.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f235563ad4b2df0cda66e47424357865dd73bbad015b79f83035413f227a35 +size 2907 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/optimizer.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..db1e6eab0303323584ae570ad6b8a31f1fe7ee10 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a08379b4d6eed2ce913561dbb41667ab3b4fed4ba1f299f135d94c5329cc151a +size 215603979 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/scheduler.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba3eb78ba6f7d3498c4e5a411a8463805e66a126 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0407/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba3f383239f27427b3d987bda245321a9767d0c10929dfe13a2d300dc1f97fd +size 1657 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/control_params.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/control_params.pt new file mode 100644 index 0000000000000000000000000000000000000000..1384252d21ad0dce884c7138306da124f740184e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/control_params.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04dac662e4e125b52ab72c88b3ae3da78da16979be49899c69be8a00092214a5 +size 8345739463 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/hdc2a.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/hdc2a.pt new file mode 100644 index 0000000000000000000000000000000000000000..324d31337ea23951ed8d37a441758dac65e7475c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/hdc2a.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552829841284bf42d002647549e44c43333a20c40f5f66730a20cfb2639e8249 +size 29132268 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/meta.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/meta.pt new file mode 100644 index 0000000000000000000000000000000000000000..2116074b465bb62d3e401c8da53d177e3c567e48 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/meta.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766197e29b4ec58b2a9b09bacb947d45eb5dbf5339194000e04c078358b7daf4 +size 2907 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/optimizer.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f291cb71036fa026f999d76d110a433e67e1747c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5811ec88830fa6e3ce9883267d078c1423ab5f45dad11fc21f55092a43b6317c +size 215603979 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/scheduler.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f3113a77da721dba74a1a1756ce994a944355b9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0497/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3423fcfdadc42bb08557046edebbc2badf9d8284ae8ef0448b2b5b7d41f87a +size 1657 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/control_params.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/control_params.pt new file mode 100644 index 0000000000000000000000000000000000000000..91c6a9839be207dd223b68e1ba047c30a63d7ece --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/control_params.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26187c99319d7bfc0c13f90c9809059f7e0d8774d4cd074233c558c0ac74a98d +size 8345739463 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/hdc2a.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/hdc2a.pt new file mode 100644 index 0000000000000000000000000000000000000000..1589a71ba3a88ade146c6a374588d9502ec4b87f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/hdc2a.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d901f4cbb1adbd890b8a58a3f77893fbb9cbebe82286be07ba250034ea84426a +size 29132268 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/meta.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/meta.pt new file mode 100644 index 0000000000000000000000000000000000000000..abca13bfdf0a83d23e2463ee318d040ed2214271 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/meta.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ce6b42dc6ea06c2be3e53a68613ce7039e54ac6566745ef1dc446f38ca60b9 +size 2907 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/optimizer.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1a70870f415c9a48eac06c835f29e2a6068e4c6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d751d8f4e4cc1c987df0c06182be581416e741e55cacb18af2bfbd04904441 +size 215603979 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/scheduler.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c6a036639b066844978cf9bc939da4eadc7dcff --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0498/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d0c459ad9b24428ee2ea1c6d589a88620ae09213bd4a4359d50692216add2f +size 1657 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/control_params.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/control_params.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd0a484f4cc8af5bf8bc3b9dd0aeb71f910cae05 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/control_params.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05e5e68f2f0c194a7930009c71ece4de4aba224767f1f1aecae2bbbbae06f38c +size 8345739463 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/hdc2a.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/hdc2a.pt new file mode 100644 index 0000000000000000000000000000000000000000..e51afba7319ed7a3f90c6dfabe32cc215819b7e2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/hdc2a.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc3e999788d79644f0e148396ff4c22eee864bff59f820f7e88456ccd7f8919 +size 29132268 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/meta.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/meta.pt new file mode 100644 index 0000000000000000000000000000000000000000..999a892885938d8d338f8c24ac5feb9374665e50 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/meta.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd912dc9ae7ca6889dd47736f399922a2739439e451598ffc77691e0254ea7f2 +size 2907 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/optimizer.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..89847dedf63d917ae324a1c98b07bb04b6a11df5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e63e3bded040fbc01673a7875f96d141f97807d40dd2bdeb90a7158832c068 +size 215603979 diff --git a/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/scheduler.pt b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b94956639031ddb8dc637364178a611fa19e3d4e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/checkpoint_epoch_0499/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa110b80f4bcd0a492a42f67b6c76397a51f47abb71f9ab7e839ded059406c1 +size 1657 diff --git a/output/lora_rank_128_mlp_H200/inference_samples/JAX_Tile_018_007_4_seed0_steps28.png b/output/lora_rank_128_mlp_H200/inference_samples/JAX_Tile_018_007_4_seed0_steps28.png new file mode 100644 index 0000000000000000000000000000000000000000..d9a8d313d9d9018ab5194ba4e6a2ee7ff1d901e5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/inference_samples/JAX_Tile_018_007_4_seed0_steps28.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6b71638367efcb8e66581620b4bd32871ecd7199a8e9f68451b11cc084f305a +size 1888416 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png new file mode 100644 index 0000000000000000000000000000000000000000..32a6f44956ce2f1dc01e1f3d01fb1c05b7d23cc4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efe5f0f58b72154973eb37df3153d79abd0c4ee4596ecef41233980d484c89ac +size 9068957 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png new file mode 100644 index 0000000000000000000000000000000000000000..598e249bb6fd973ebbe53df0c60525c96f8ade66 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:474135150df20a51b50e4b411234a2fe69239d9c1e9bbbb85c7d0a37c460634b +size 9023857 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png new file mode 100644 index 0000000000000000000000000000000000000000..cec2fa1af8b26b1c8ad5a31a625586657fde155a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7235ed52321b929ee36f68bdfda1eb8287e03c8e72ca7b035333612c55dd171f +size 9235977 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png new file mode 100644 index 0000000000000000000000000000000000000000..bd516934a3ec5202fb3286072177a697f8615c59 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260a122d40e95499fe43886770b859915e9bccfd8d4abe75cc59e2fdd0eb9999 +size 9420225 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png new file mode 100644 index 0000000000000000000000000000000000000000..8fbefb104379db0c8dd3a469530bf91160a8a0df --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91435b6b1ee37fe0ac7299ecd9ea5ea68d66e1b5819858ebeab282e25f90a5f +size 9427480 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png new file mode 100644 index 0000000000000000000000000000000000000000..2fc57b27f6712acf0274178d63b65b721a48d490 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ade4faa457122e4365eb38fa9fd2f6245165da9ff0e8df9fb8542bfef804989 +size 9596215 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png new file mode 100644 index 0000000000000000000000000000000000000000..d648a8b3fd246b0146aa4b8633563254c5c77fe3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c14fc0107c85f7c347fdaf7169862effbd2147efd35651a23feb35a412d609c +size 9212903 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e3bb6c28de3e28401a46e15df35aae3cbcc7ba56 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b368c0fa6b9786290a6ba5c04b6a3d64e78f613cfa2f7a5a092a4b3f81117153 +size 9486332 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f050cd849765a1184da79cd98ae03228761929d2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6db3a5410e437d5851b66f767deb42b0c324edd3501515adfea942f2d66f58e6 +size 8874960 diff --git a/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png new file mode 100644 index 0000000000000000000000000000000000000000..9543559847c1cbc299cc12068ba8d30bcc6a736d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:443735465701cc7e7c1a11ad3ad54c4a8a760a776601c8207e9ecabd781503fa +size 9047478 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..e56bf6e52c647fa9042a2bee298d252c7f84e537 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8275309cedddab391cd39069d09031db449c7ed97db9cb8db3bd948accd53c +size 2018465 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1599f4466517958731495d39109efd5d41e9e251 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d372ef157c4cf1fc45cd3cd368242e924986830030c9813c7079af9725f3d0 +size 2739997 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..7c2d5de429278a9098c642f3869845b7b3f8c931 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed4fda278c1b20aeb516dba25d2224a4a5324f4d7bff34031dc2e5c91ba18cc +size 2762463 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..4cb62cb368bdf71ab5a28fa112d536be3e188684 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4abedefd4c596449c12a12a5c93b2d39a04fcc5f89aacde3c572ffd6d98e7c9d +size 2760721 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..55a2f8441020ac39526867b8bd8b6b20c9d46d71 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c66ea3ad651070568697e044311cbdd087d2d241e91a5ec4a5839076448f4a0 +size 2728045 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..f2393c5a6a09158e24e930d34309eee485e3d8c1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe4ab9d86a96232e73a2c07c5448170d476d3d73d6a95c99cb1d47d82fe70ef +size 2710024 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..980cef1eb07e89e9413dca0be583b5eaf030f877 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0b2fd7df978f18160a7395a4d9ab62e587b931b8ec9f7a87d3a62804344675 +size 1785530 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..f5033f3fbe8cdf93c59f4520c1314826a13e9402 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c4ccb7e15421a47c1618bb0d3842f30f8f5dfa419d02fec1b0074f6df2067a +size 1892412 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..aec79f1e59985329957b33be011489556c1587df --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed76d17bcdd1ce2137d6491621c9764be4d0f03f9ac5d49f856f96e90a487ce +size 1736279 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..df2cc6be4304ae7da0021e27e509a3ee2a404de4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93ee77484eda1c52e18dabe24ab829f0237489e81d761277e328897022f290f +size 1858950 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..2fa6c10581848012658f39f8d0a8948aa5c23d04 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c72ae14b8ca12a1674f5df9ceb6b7385aeb0fe576c07179948fbae1cebbe216 +size 1707600 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..bf6352beb93b4c4bfb786dec4c236eb9987ce749 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b37c06774438b5551250e205174ba53b853fbca5ce6287bad8cb1814d11ad2 +size 2046979 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..62247b1ad9c56258d31a348879844ee58372b69b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ba542157ac275765f471195d11ff9b9733ca6a40acc9a75e487b6cf2547b55 +size 1788602 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2a65891149da07977a8f37fa77ebd77c4d85412e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec7f860fb8620a9f990ef1f211f35c27d28a997cfb313a1f473cc3d3e010be3 +size 1820543 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3da0f4acab6a8f13d275ead58fbfa4a391c5a856 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202e0909dc0b226df7ee475cceaa3efcfbc18a80cc6c23b16c85e30f6bfad5f8 +size 1690996 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..6e1ffe79e3f3c21d8de1c773516334c0eaeb38b1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19a8f837b850c928ca29cbfa5d6ac33846aea75db3fb71aebd6ac428cda1603b +size 1751572 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..167e81c0ff3a99b56baa89c5a2e454a929f274d1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b869e4f4e5a556b5c6dd401dc5a6955de3482099656b156a16f351929a258dd +size 1831952 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..394458a0db5c671fb7d0daf93e584dce3d6faa7a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a4b92eee37bf66f17dccc3da3bfc78440615dab5fa07450bfce5239d4ee343 +size 1889645 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..2f902e3d04c23031f4c0a0ade539c4cc94d00e1d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ea801cb284ad29eb884c044eb37c6d7b2772d1ab2a0ccbfaae1b7f53767beb +size 1791302 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2258fa869f0c1e34d10528cf380fe3e45f164ca0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fed78ef3b9e9aa60f4198aa5a728646dc9f1273acd5cca8f606da11610ed0b +size 1801233 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3fb5917aeb36e2dd1e5db437b2fa7d159cee5cc4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c426961b1b1efb924044796adb85539e173db76f1246017f027661f41dafc26 +size 1677361 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..0c63d5cad62d5d3553a8b49196d5bb7201ebafe6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9463c384569f9f79e811d59dcea464788395621e3588c6939bb2eb56ec875c0c +size 1758083 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..cdbe782effcc2742ddcfea3ecffcdedcb1453c30 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f950339cc2d4ea7a711383bb4870fd9467e1c58e03ad7fb4ec94eabca92202a9 +size 1882320 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..2be68e4e8a9d31eb979f7023e0e9b8ab0cbe712b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ae4ae24a8d7a245bc44bec8e9c4b8a413ed2bcfac50ecd36f616e9fedd8318 +size 1950315 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..f72d501c27c80b2017e13747bcc5e3a5f2aca0b8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5236edd32eca89537fa16fdb0f58c12395f693b862569ae68b6aaba8f54bfe91 +size 1800223 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..6926132bab78c7a6dbfcda79c4f79434170e7588 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca0574a34381f036668e8927131e81b7960b81e3f2895d1d7d519295c2a3792 +size 1731660 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..33986c4692973acdfc92774551c2411294b414f4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7daf13f61d87860e9869565611930c9ea9122bc61677f569b57de996c448a4ce +size 1695728 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..2ba7095dca3ed4acfae439ee870864927d2b203b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a423b0fbd1e6a5b62b187096d4faf380a53a6a255028a36f7cfd61258da3c845 +size 1749341 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..bcecec8cc334d4014d803ec8ea27e4a789b75642 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36a663bdc53f0b973bebf1acc48bf1507d100bbacdf0abb3c3f78aa1b5fefd9 +size 1896111 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e0cc7dca7255fe4ed9a6ce86b00b8d9889374316 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be974273931e7d93ecb322d9cde1ea9b28bf3e1a2ee75b8a1c6667e33138105e +size 1977932 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..b9f73d2a85a0c8af431c278ece704c5d3e25ff56 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f77b4558506e359e8401f073a56fdc910be24cfb77242976cbbfde458e1351f +size 1779128 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..66ca140e31ab80d628cafe66b6a31c25db4c04ed --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61018c8a5d171293c398bb94eb5c682b1b3f69b1937356eba58254324c52259d +size 1695525 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..0005dcd91d5dd3f37aa97c9a8400d106663c2b77 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a337be822d3b32853d16aa7417cb027330fa6c6d13b6f496808568457b6494a9 +size 1642647 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..32c0dbcbf87cb3a4b349a9faa5368ff5f220fd52 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9512fd6167537d7d1d0c3d2878c5dcad18694434df0b4ed386c6bb0f336a8b2 +size 1794886 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..88427f32ad4c820d679feb528f3ac2f191613098 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8715bba8604dfbedffb71274ddbd63bafd0754d3fd1f1b6af9c9fa07f31cbe52 +size 1747274 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..dad1d493bd2916a519186b56177c097b332f859e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9769860a4a96c63f1145030d6913ef09b18489824fc781b2853c7796b6607e9 +size 1894916 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..dad019772117074c3bd931e141fd339f5d5b9d10 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d6bbbe942ffb1d4cd5f94a0a6ceeb7c3d4eab882450b91c1c04c7f0e9cb383 +size 1774313 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..f473c8940a65945cc8beea707cb95336b07afe98 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b03a9f565bf669cb2a4cd527c126c16429fbe9474c726666c55023dc3205ac +size 1684337 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..6ed2dfdc5eace3b956563ee843a280181515c6e6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeefcec490f8513c0f49ba6db76931a4e19a44f50b8b3fe66f08a0686e6e615b +size 1735512 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..60aa2283088c15c5021f8fd1ca76becdefe77aa6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3646a036827eff917e2b3c1cadb0160b112de32430281e302170e2c2c9745f +size 1670422 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..5a84178156e94802faea3a5f8377e8626349c5c0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6a748cdcd66d5a716eca53a870568ef149598bf3d9ee1448382aa42d8214f2 +size 1773348 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..de877ceeae5550e9a393d075460f0d97999ea5c4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa286096de8cd0f71f76edab84039f195829ab98cf2aa188c4e6189e98d48e5 +size 1939329 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..b0f671abd23f4adc902c7eef6ccab13f7d073a32 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907e814903b3f1d42406af7443e434c8b90948a6e1916e10f347e4bb941f0323 +size 1782990 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..00d33a64f70401b652e095dad92ef031d26443dd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6599faa610fe843867ba13bf82ede0591c75cd7fadd8180f696a4c58dc6a9ef0 +size 1664388 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c940fc16ba84ce3ac662e0a5d236bae1b7a7e9f5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab89dcb2da4fb184c0fe5a1e70b8044e760e33e8baecd7719258e3abfadad50 +size 1639536 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..fd646ef1ef7a5eaa3b356ae1a6a250984be37e85 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9427be59209f19ebede91e06a68211a728f439e86939fc732e08e5406ef5fa95 +size 1807988 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee5e31b03cdaa69bd3dc7abd8f63e761ea22f45 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5f4999cc2b59b1f92042a1c2dff8b9c3c5bf889fc45ac65bfc52651e0c3516 +size 1908682 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..1a2abbb0fa00360c11d7ec12800990f766da7aae --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d1ac3445209a2e5aa733729768f4560422d92dcc64ecfe74815b994cebbce5 +size 1953375 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..3f3c51ef61d3595307c37d1335768ae37a0b433c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5877473521102f85ebde971c73185764ab630e770e53edcd509b440f9ff9c5ef +size 1776732 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..e03496e6cb8f495ccd001c417fde85830015db68 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b051967406ff71d1aba683c971baf68e7b8152b4a244a4e5619b1b9413087002 +size 1598447 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ea39e966d1c670c31b1b17029584968936c795e5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:602c2ea49e6530401ad572c81acbeaf35acbe67c38c1343598a07cf208375bc6 +size 1667545 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a0d0db57dabe8ae372d073366bd2cac1164ae744 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5679c54f8f4cb3208d9ee7b84c7121133babdaa19284eb7659d763f44b607ccf +size 1657603 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..64deaf072632fc6cb985bec4487a841873214e2b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6facda30ac031eb1586726eca8d0d07865b2eff6724d11cc54e7793485ae1a14 +size 1863245 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..1dc8f3967b716745c583090058f686dae2e19453 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61e2c32e4e422981b2b6311b2e85d32cd7ae4c3e58e1ffbfdce5b4d71c2f37f +size 1844555 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..fb757919a86cd139c3b3887a76761bca116406b2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d61f04c136c80ed685bd40005a4f21f7e5d761dbbb688b3e6c379b4553d02a4f +size 1777863 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..5d0e655ff27be894bcb3b3a369ac0f44e8c36aea --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6501b0ce8fd219f76ae498656e7d68927fe90a531d041f0eab18dc6b305905b7 +size 1526352 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..acff59d49303c66a7ad4fdd5d4d89a3705824d29 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b15d8bc59788629bebb6161003c41b819d2c1733ede939c631260244432d98b2 +size 1683480 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b4ec4c5008f44a03b79610b3eff2b6e96ef40dfd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba092b4c224c6f5edcd52f40c6903067c7671995a11955c24316119637874edb +size 1730194 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..04988a1c06746c991de25d82b4da207c074d91a8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e4e58de9ea9f77ee9cd530727748864b5823baf61b7bdf893feb2a0ee7890a +size 1762705 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..dfc843e059617fba71b4d186d1c3e7b6835cbc3d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371db6aa5b2f23d36e7730912b87e0db1360f1b9af3c5b2ff459e3c5506bd004 +size 1910342 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..70f3e15d4cae0e2a273e85e2c5b3143a1c7557ba --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00196e1f45979669a1fae3be18885ee6e04bb14888e76fad68fe9fef7d25a1f3 +size 1776628 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..0985b3fe0703b35d30009727f9f50c10651f16cf --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70b5cfb1fece482285a85a5e529605e099e673ee4af1c7a451c5d00fc3237e2 +size 1684478 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b1b969e65197c0d3f258caad694b3bd45b0524ef --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011e6fd87297640f5086205be6f5fc50db2a38ed5e39afa80cf8f6c451020a32 +size 1604570 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..1bb278ae253348d3aa7318b6fce71a88b5e4c85a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9051f845a1aa0eba38e979de981ca914a46252100ac305ab37315f47f5c27170 +size 1660364 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..69a1cb3dd7b680131befc9874579ab6d7528dea5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5009b4fb02bd285af01357aad91f5bc45257ece5c59d32f626cf20c11f28ef +size 1735527 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..d2505c903fb2cc7c520674dd3fa12d71afd1c9f3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_0_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b083fff6c8d58e04f3f0477df75973426cec4fda31aae69543256c3bf13811e9 +size 1978291 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..4bdf0d352793886497f08b93a7ec39e1b8d216d5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b972279b4e2b306bc03b3c9a728cb2fffa453194e3fc50eabc431db4d02da7 +size 1951721 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..18143855741b9ff0eb66dcdc623962bf6d24132f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c480d29e6234bf920d722b0b55387611d8391c8a287b4329049082fba9082bf +size 2650894 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..23bdab41f42e9f3a85533a374b0f6b57c75f1893 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268feab4b8a3df71d0c95c05fdc2092eb764227f306d492bea3a20264788ca8e +size 2720838 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..ba3860960777dbda0ba62ae65e7fb1d16e61b8ba --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4196ed2bb7309c77909746346404e6c0b6ad3b718dd14cc916cb5a0d05f857d +size 2732215 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..7f71e5c37acf281d31f98a6411290fd7345d9d4a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c665febe301a58484227fe2d834fcff2f589db5930dfad9df7d15dfa9e05b3 +size 2754334 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e48109a983f9c23272e0854aff67318d690cc3c4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27784cb70f03be845193dc94988bd18dca723a1b7dfbf64af847937076923f6c +size 2762934 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..e6436a54e266b8e5b1b5b27e766c118d9e21692d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f89ef3e547523c4a91fdf29468d2c1653996d4d088dcd28751bbd2dac3ae4ca +size 1712727 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..f9eac8f4b77724439ca3e2233f6139af899cc3a3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e42b6af3d588a3547355a2f144733549cb02d7653556c04dafc07f9ad90b7d +size 1709453 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..da161d49245ab82c0225d877f481286b167be03e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605ff9b87f4464c10594519eaf00daeb6f916f4b080b27dd37e3fbc8be7a58d0 +size 1707916 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..5385f64213d9659d2608fdf2c458e01e461f52a6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ccdcd4a32d1eed66c4c5ddee1ea23290f34f8fc02639812d1267e490939ddaa +size 1722962 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..879d01de8672efdb98d45f1883857e19b94b42c6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5097ce789e0b0232cb4fe9f795be5e973e558b0498e977790662eeff885e8ca0 +size 1838546 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..59877ff6d2290b36b9c5a4335619a2e7193dee03 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79fcd346f173c3d4a5c63e0c98cffe4a9b20e887d91e5ec33a6ea9519462aa0 +size 1784596 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..bf420ee053f65fceec07921e878dcf9a7d1f47c1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3bea088c332c3e8bb438dd921e293a6c32785a6d06edd35bd238e86f1ac37c +size 1719138 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..d833595cc59aa9b43886ccdb7b3dc6ab2cb843af --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b136ea9956224988d649ec2625a2a3f9075a402d33b6141245a01bc3ff5093a +size 1942756 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..694dd4e0f9dfde5c23c42c5d7222c9094e3f4d68 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63591bb52ab142f654d2022ebb60fde9b704f81302ddd7acaa56ff0f6675cffa +size 1567907 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..c0ea25ff57f84d32579fdf1ce009aecf0e8a3ddd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f9d190610f1f165022b95d914c0e88dab43aacb0bcc97d0e7bfb4a582f8a0a +size 1782557 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..7ef3fc557473c4734bd8fce554f0fdc228761d19 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df562f15438cdc66f42439ce1774c844723393acfbd7e8628dff80f0372010c5 +size 1752937 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..bbbfbabd41b28bb4e227cb2414c1c7be9aabeeb2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f25dce8711ff68785724940706f1697a53e397d9955650cbbc98132d89dceb0 +size 1932127 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..cc62b7942c1f39c35bc95d1baa245ebdb152161d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7051efb16a6a9dcb28b888e21c166f906bacff8702009d37ab5f808860b9c502 +size 1730034 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2afa707661f9814921342a1d1688a958a71a7e9f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61c0e698f0a001d7fa5593e87b9b65330e51f7cf362c356a10727e8c4f9b0970 +size 1784375 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3c1152cf21636d10f0669e1a0b2ef9bb231e5655 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18147bdfd6e9131ddb1c881475f3eb110423bd8398012216b72ab51bcbe63b5 +size 1724137 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..5ae9ccfd4957889fcd9c094504c8dab36a617297 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7981daf71599490771f94715f4500dd212c198beb8ed7667e7b8b9fbf05a98c1 +size 1837584 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9ef916ec17c1fc83a6ef43afb04a514e58acc5b0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b769441dbf834da7374b92850ec19516bef5a3bc4eea3856e0d08b5439cde257 +size 1767069 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..b7a777bb7047e464f977815345b6e64b124dcdb0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9daebe8cb127f492e68f5da7fdb85f007074e21e60a8a2bcea43b92ece00bf46 +size 1865103 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..a80c2d9be418cf6dc6ff75d29d423eff99486dec --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d37bd197774881a9ea6682f7cec8b3ed4d18cf49710375249f4e80f2315f63f +size 1715367 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ae84457c416d040931c02f4c00219f4f8b2b96a0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c375907e4ad604ffbeda87d69ed094e44a68b717a5719b1f79f4114b506b92 +size 1867645 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..60056aa501c7f6745044caa18ffdf6bc1043c79c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b631d7279607a2bf38400f0b9e38b22f35f8bcbb6c516c551e3583b5cbe9c305 +size 1706234 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..ddd923e262bfa9eaf017660f62babc512dfd5f7d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f16ea87d72bed7e935b42abe60e8d69cb9619d94f8733c80d4b6c385c9f3f0b4 +size 1603088 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..4924ab3c7e9681d3fc6b8d2cdd35f20ef7f6d29d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:276af96628be13ade44f3472d457b423a676b798a2c8c8d69578f434bf402c66 +size 1798443 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..c6d79559122b6ddb82cb9358d66ac02e0557bce3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4e3956967ad5aed5cbc0237ac14ec2f065f2b5a25bd1999dafa97c0f991f3f +size 1800752 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..77749bc3e074475fe56a563d72ed7ee99b9df80a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfeeb8e3e88fb3b602206ac901b28b468c9f96dc7f0785abc6e5108bb8c02b54 +size 1715732 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..3d6a640c1c01e87b115b0faf5f3e02df7bc8f14e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda06ea2370f74f2804db73e5727e9407b0d9f8e336b2d60940b9b29405f0439 +size 1722344 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..76cbe8c1c1271a994aa9da5e98450dbc2e7e5358 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8948e189b2a88206869e04e2f2b6bb99d25f1b45ab33e0b39bbcba5141f1bfcb +size 1772433 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b414da8008aedd9b967d60618868f13e57e269fc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7650603d3221e9da98ed15314d855b151020936a87f00e01a9ad3c941bb717bc +size 1679754 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..38bf8ad028a7debef57cb4e87b0917f6b3a3a614 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb9b1ebacd3274dbbfeb6e2f2f90453e725a798143358cc7df19906e2dd2a2c +size 1800514 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..c9a9e43a1a2a2b0860faaf68d6e56ead0cb440f1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338337de2cdbb2c477f6e9578f7a02a66ce691dc94326814a8554ba3a0c14606 +size 1852541 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..4aa1f49b94993574f667a1082cddfab2d56b7fef --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91065814d1f990c117fad6a2030ddf175e02ba742f29c156be69cb4bf263c0b0 +size 1698549 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..850e7c70d0b0c483492818f5abc2f6c7b1373492 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c62b0b3c061ca2f3b318941d440e829163fd14ccbeab4d3c702b613e7afe90 +size 1731309 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..21696aee8505e6a4ae3f3cd7f597c349376ada8e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562e532ba9c6130c8e5a8a43868e1070c497d0e916826ec3c9887ef49593fa9d +size 1575744 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..22a109712c93a3d7fedb842c71e631c0edcdbafc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a110928aa870ede0b9fc0b3b75c31b5a236b564a2b562f241b7994d01c8e679 +size 1681837 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..da95014cfd88c73cb51866bf6a0150ca2a9fd054 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:750a97487d4c5727cf746a962d701615d0545df4c4a4b9ef827438988108dee3 +size 1783447 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e71daf69754cee5fd800e73bed4ec454673b403d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978a77571c88bb4ef39126e6e883b495e90bf68f1d82dc68b17ea7d43cc2bd11 +size 1703375 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..43de266d78c07d6128f3b67da8b6a49306fd117a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13dffcfab951aca200ba92d19b48f23a524a896eaabb54a05ec3bb0eb1623093 +size 1717475 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..cf91bc3998ff22b780b09965fba5d4022eab7998 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf94368d20a968d74627c375e8229fe7d93c6128a847c0c6970334b78ddc238 +size 1784540 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c397aa6939551415677769b54fd6b83433fff481 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f3dcd12d4d18691c4f5ca57421e1d5aa6575cda34a2a7f109a18bbd1eebc15 +size 1780045 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f79aafe323ec88004abe791ae43e8fe1e42602f4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d186d2babeb5e2afd86820628361e5f0fdd2059ea6930b36cd8525cbd937f0 +size 1598591 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..36c041c687d19fb4fb179ef20bb8d41eb4e7d9ee --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa3a340d74d9abf8805033a0888af461c46e2fa418973781580ea1c1823c52a +size 1804050 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0cddabde09da0bf27646253987091852a9c7d143 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45b4cee613766458acdbb40f3f79bd5ece3e4863638a4eeb42f0273624a2864 +size 1774892 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..913d7040be31ec8fb518b1dc718ca0a3814d5779 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5beddf4a020cba39dbcd53ecd01334ab2897174960b8327991eed5539df43404 +size 1711178 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..aab50e9f3a6fdeee20c0140de29f017f1a734b27 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4757dbc48159fce8448752b03279000270f56b61a0ea74b179eb1e8f59b002d7 +size 1704457 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b675353d4a1a9c6f4ac2e3099185ca5c7a85b141 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1ca6bc81deb848cb26bdbe66ebfaafdb10adb28b859cf6db5e5e353e1aa440 +size 1655935 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f95b3459559ba3ca1f98fd48b69c166269ef6a22 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0950efc098b9398b6429721a2b0161ac7777352aceb068f863f1044898be6678 +size 1735718 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..58983af5aa5ba5d0d654a49719d851744fce33dc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d74cac72c6d23dbf6689a3188ba56b85e1815cca1130132c26d4489d85ecb3 +size 1836265 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..67b0cde4663eab27706bb71dd09270d6c95d6fb0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5273bcd97f169192386e6b458b07c43f60f436a3ca12a1a3d12204665c2da8a3 +size 1846241 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..842d75c314652d96c3a4851dea121824f3178a0c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4c05c5399b51d387fc9185d22042f534e5f83505216ac7c3c6c8dda5fb87e6 +size 1717509 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1d5a97eb7457c7f172fcd52709d4baf5b0ff8f41 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a188daf23916ab2a0d00c77979b959fbea72ab31cceffa50eec8110894cb0d76 +size 1737480 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f24dfcd972cba50c7b0d9e4ef3c743a533fa9beb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a908ecb97a8ac0529a7609cb961857cd778fb9f2cdb8ca72b3d1e6082055715b +size 1762403 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..c62f272ec6a49c48e61631943be8e7c8afe8ff5f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1449c0a30eba8a2608f0c6daeffcfdbe2286f08f8035e7647ea91d8cd7e30bd6 +size 1797459 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..bf4e4e18e445637f7c7ff685ee01f8b5fe83eeef --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f85abdd045eaa860b586a1101b4737402d04072d15227c98c399bcfa3446dfb +size 1734984 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..9b25c378ede28e9d8f0b80d22f5ef1a7a1bcc9bd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4507e9d67e47b15ee4f4f10b763647c61339ff50efec82bfabc220f2902c204 +size 1787994 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..72c0ff6e3c59007864d03ecd00ccf94cfabea0b3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d7c48a0c6727c4448628afc0f89135da378ad5fc7b36457c92f89268a698cc +size 1703469 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..e561eddfcf06b5e5326048852db19802a6a231ff --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d8d21f0802bbbac696a7ac2262791a84b89da1d5ea496c7c141d22dfd5598b +size 1754489 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..db7524fc3cfe8a904bdf89699114e764fff43e9e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f1931f58eb724d354f4a6fdde2f9236bb8737f90aff4589ccd9ba7b112808f +size 1733697 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..04ce69394a95a30d71aee3a9fceb49d9e38a8ff7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ea526058f48493a2cdba24037a90f591a580970987017615559a64e0269fa8 +size 1639883 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ec21497c893284d6bc58bf93957ee6e42cc9830e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51023b31e51d3f7abf8add8013c4807ad97743908dcb51fd07384f8f9d539050 +size 1783999 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..f9235a8a823114b7cba25688f8266a193d252022 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_1_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812c6af1308a5607cc798f3e1d044570946003bdb6269f86d83045caf9798cef +size 1786750 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..5c5494bbb33caa262543855040033ab4f723b6e2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2caa98ac05505c6e8768619f81e5753d6ace316b87aca2dc9c7626dee75033d4 +size 2086725 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..dbdbd99d52ec388f265461e559408b792ede9e17 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944b3776f10aeb78a1d43688c0ff14066bc03a2f3b1ce61b9943f7451cd38177 +size 2649374 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..430dfe4cf7a5cf365e88174c73c047a5427c6220 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a951b055989f1150c0148a12f2748e7f3f265d42dd2a2a024050aa8dfdabc8f +size 2700936 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..7d8f45536c08f8165cb76e2213b83e1f142f847a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffb17f9b9b7db03a8e9778c93dad661b1578e822b88c5f86c623bf780bf9494 +size 2711290 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..3b34fe9dc8d1d95566608970f0de0db754872190 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b31ae663f8762e501c2dc7ce68c25a8786a6d383cc60801ca03ada0b74d465d +size 2753903 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..bba5b6550681f82557a183a3d4129913bde14dbd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b878fa18b32736ba02f3219be2f4f39dfe54ef024edb0a51aab81a7292a15a +size 2702732 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..8f2cb446c4e3d9edb17da0b5a43102ae5fbf6332 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff022e91812662eeaf9ef5afc0d5c8cebaad1b6468f1034c72988369591911a2 +size 1873451 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..3dfaf6fc267f4e6f097f2f84ce495146956c65c5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7136d7f45c81b213e0cfe11e887defd0dbdd773dd571818b50455ee6e6e96f +size 2002983 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3dd1b5e0cb60f5e2aeae2129bcb4b96c0b69ee25 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224a5e3ea933e99ee637061544fbcdd085bda26bbe6923108d559f7cbeb042ce +size 1616432 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..c0242fc3d7c0e74624a73d41f19eaeb38d3757e2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72ffaa591481dfa13dd050029478f6db45de6effa7c9461712af23803d2be7d +size 2030541 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8eded52fc11530c623913d7b5720d880cf428a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a68a6e7963cf9468031a20268a33b9d6f5ea7e6c3a52378d68bd8584b7204f +size 1702912 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..cf57fd25a13b3d6193a68a620f6282d25c09f4d1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1eb88ad526c8648ed19e5af0b4df677f22b1433e23bae2f666d603fa17ae154 +size 1943501 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..63469d277c8c72452d3fa17850a1fbe9f26181dd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e521f1aba6ba7527ad2aa35f8ac845f62f122dc8f508ef452e5fa1b7ed41925 +size 1871171 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..6673152a4d98106b9929d9b0c7556ee209ace2f5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e4d1a63336ece9c88b2b1f57a70785fcac9a2e91c78cffa323e34b1cbf9a03 +size 1838110 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..a8546c877726b0de4b0cb8b2d70af78621e055c2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f088f21055277f19a7775c1aff5784f08b623191ebb0717fc717cc795ca7c1df +size 1649274 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..911adcb248ac6ebfef6c1cfa4e5f27283a95b4e3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ee07e25931293409a1060b397a9fdc776977ba2d4836eb9d376de2c1feaaf0 +size 1972463 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..b22c75125153715f7f987c8c07600ada20816aae --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3bb18095e81c0dd982e65c4ec4580ced836f83e800d5b3698b9da3f527275f +size 1796949 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..2eaebacab7f865a8e65f5ad665356fc06be16a96 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c98ed7c8039e6bbf2b51b2c20bfe8e0d958c6dfd37e22397f0223d9a3cc006b +size 1748753 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..d18d8400f44a679d1a8dfd62c39b341f26b84c1e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88409eb969e888aa21665a1ff901badad7ffc3e41742a1cbeaf1fae82401d23e +size 1877745 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..e294d788e9bfb50606037f1135d0482e012b8857 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5032f36c6d723f047ff7030eb209656d280c82b62119440ff492a6751feea5 +size 1812519 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f5ec36b6714bc4a6ddf98e67c1e31c40158f2e8c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:647c4bb60aeaeda649e05ae1f397b6a83257a532d87afda44031ad14f44834c3 +size 1674567 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..190e71d6e8da3cfbfeeaab1fb7e0de752c98c9f9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafa8a139e50904c0ae512fb820e20ce1641e67a517f44804b0d6f545555a5ce +size 2088121 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..98cb8ff04b64eeb9cedf3ca3540c3c7d3ecf5831 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d855e5293cf23b5ffe697de87268428d7fbe5fe560051ea04b38700a0280d5 +size 1847373 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0420968ae74ea03ad602843e66a8c6822b350126 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6d8076ca9c584099d63a81726ac8d9fd7d804bd55b66e28b9420463b12e855 +size 1842192 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..5f2fbe59264db7599cd47d419241730a3fa5c029 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eaf3663a92361a8210ac941bec8874f6d023d693df553f7634199f2c6e9f017 +size 1877109 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..79ba788e8e1da8258c74d18875fc3ea56d2ce944 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb5ab4d8db6c2eb0cbad8a52981a911cf205172fcd3f679a90d36f25da6d0b9 +size 1808953 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..29e9383239366b7f7729cecf863fbbdab28e8950 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2832292e19a3e07b95fa43445d08eb14434018ec4df9145dd35c657c43b0d09 +size 1745876 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..eb6fe13cf91e6d0429022f963d897fabdcf003a9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde1a235f1e208f42c9a4eaa831effb2f4b300f48083d0260cce016ed4565ecb +size 1858976 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6940e0ad6e2814bcda9ea146ccd4a9e6c8b72ebb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8675fcc321b9087f56e610ef7876f053541c1187f3b465d59168ffca936945bc +size 1820553 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..4d49d4fefd8da3ed2826168dfdad006d380aeb14 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806cbd8ed78a2e33559aab317d2c81d03c8c620e149eacc2d83b1b6535629ca0 +size 1780042 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..b1d53b1fbe5c7c2737c96b467aacf12ef791a6d5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529ec8c742503e9634af9d7a96216c8836c89cafa638371c2abc7683b85feba4 +size 1869043 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..8bf539edc41b8798da53d971771ed49d61f42bcc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab901f0c74b424e5db893c2948c5a0ef245f96d3a18535842717fcee59985c8 +size 1739855 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ff7b8656aa77eefd5a19876b7b61f91a730d42e8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b53dd14cedafb30031d7007ffa4fadda390126a71f9c1b53f33a3d96a3de53 +size 1685659 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..2530bf9c7642220f4b9ef5697384ece025d957bd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8f5251ba691b01560b8864ae8a7c9e6b2e94c68d8a5a4e4bbd8c0b6e9921d7 +size 1975297 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..cb0dfd46403e6be3bfa938194cdf15975f14d282 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5c1cfec1c2b6283362edb96aafd2a98b25b75c71d5a6599e38f8fc21ecca74e +size 1776810 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..fa30859b208b09f7178d62600683173a8404bad1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c396579a17bc06097ccbac3576f707206d0a30b81b45c10a5aa9cf3bb5cd7970 +size 1856608 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..3135dbf4527747b8d192bbb3f0a32668201e045f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1c2bc9302e82c085ed96e3f9a174e78a6ca0ccb8837a5769d5c4b1d111a95e +size 1861354 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..016727cdf6f3f4bbade6eb079564eb5d419de8c7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b67b1831e4d95e06182bfe470b568d6fc3ec4d6d9a100ec74088512da93c2ff +size 1805547 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..461e867dfd85e3e891ae8adeec2aa6352afa0e45 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70ed49ef4503b7b7698baaacf246e94446a788a2e24b707d222264bfee5b58b +size 1563120 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..8bb5b450b2fe1a7f8bb4ee42767eba399dcc1567 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:765cb348782f6587e514273ead29eb11c40e8384f7f535a0691067fe622132a0 +size 1927328 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..69bb66878ed04612ef192fee79a24a44f15bb650 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f9c9f0ec3d2a255ab6266d53228e7f54d74a77fee9eed80d8f52680bf0ce1b +size 1950551 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..343a3e37e51e94421c287acb409c2602fdddf778 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dafd23e2a3415e418d9e2cf20dacd752a8b48d7bd784d00e3cd1b962104c20 +size 1938427 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..22c96925a03d183f66d532f90e854c3ada1e6585 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6350dcf3d6c73a17681665931a7ebab0a5262784bf736f0e261049e701b8f26 +size 1875907 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..22a7c18904d7a43dd6e7cdfe3d87243a226640a5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91408694ade3952ea4ccadade919681ea1f8ef73f41c361e8fecc7635a06072 +size 1837043 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..8e98b78dfba7365abac6ee874c373edc6c659870 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db5c894c0f501bdd1e560c5445026bbe47dee542b0613497d96403e2e9a67a4 +size 1782694 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..820cf52ef9f77e9b87dabf18a503c7de71d0e33e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5794742c1a8536a9c3093304bbd5fe93583a3012e920ff8beee9a128e93743 +size 1925753 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..79064e0f00d786640f5133cb02ce924c0804c356 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b25f5eddede452a3cb29b3f68a27e39c963fb53e4091493e79ea9382b8a412 +size 1845302 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..224658fe6900b0ff8fe65dc0ddb6f3bcfe41a29c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427a4c8b69bf74247537e54324169ea634031a103d800491ab7caa5b3ac557e6 +size 1864397 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..73ae7707c45da63634a4c7ca8cca130217e4e0cd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62c5260843d0d45bc09b462f7cf14427ea051bbf6f3be0b996e78afc8c194fe +size 1875207 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2d1564b76b5b6512781a2c278b58c25efafa5180 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8d68761d92f14660e3c3fe95bd43a37ae3a7173018a098bdbc732ce94a5480 +size 1790845 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..4fa9806cd7cbb9f8d214481b25d274033e17c85e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc85b83d6754c094231b70bc6308ae191b594f1912956659c8c6a3d8e87d4e17 +size 1674565 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..8f2419f71657178bd302fcfa87a405e68b7c5341 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90eccbf3b881f891c296dcea6f5e7fdb14fa26baa80f639f4024ee90d669b51b +size 2020746 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..7c4620c2f8074f76a61109493aaa8caed058de6a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14193743a821f278bcdc99657e001271c71259e4b841ee1de2c8f4a733582f06 +size 1832980 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..aa7b0733723e50c41974bf79c30c3537204175c5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b5f5fd8875059ff9c29bbadd979925e148e1522e990b8c7dcbc5b354f9857e +size 1866521 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..fe0e21a0cfa114f451f4b841c540ceac20ea3db8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea794f66856707bdb6b9d1a79fea41046eb4f9b5334d3464aad00d955b964fe +size 1871266 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..7a4347ec5358a88c816526c392b79b2c572ce34a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb52d9816ce75b9823ea2b3121bf7482dd7c086a9384053e1530bda3948cfb0 +size 1789352 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..45939d5c99db6aedcbc90005834386aa64b087c7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ec3ebda7caa7d0bd6dcd90f2705a80c8c2a0ffe5053f1bf576cbfb84a865ef +size 1667440 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..123da88a3d25f58033323d61eb86b4d57582fee6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96527ef5c69f6f554febb30c14f146e60100ac47f5f1994d2a040dc4c723ded4 +size 2017763 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..709b6be3a4c04636fb9dc10820611609b582a2a9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926d5fad0635431faf7261af02fb3f1b38b7732f139933a1021ea1294def5a90 +size 1839365 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..883859a0bb84c8e1edbf54b2549cfb9d86e7f208 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7faeadc0e30565c4b56e12b766eb912459b47f2290ed01d4c101ecd23c3350a +size 1740534 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..0a676204db476c00344250bc82365e07c262e0ea --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6b8c928e650c6a2546562c61409a6ec12fab7df0c6ecf4f47b78e5a76b9917 +size 1872007 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..5de485cec8ffc2c8105e408a32742d03e6e5baee --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b0bacfca1d3b4d7d57af4020442d340240c35d269975136e79515ff5adfcdf +size 1722033 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f6d5c2e46b4394c15055a0157c3b0b66c9cb5e5b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f825fd7352cd6b216dae71a619df0ac1908b7daa5983451be2ff2fa7d1ca31c8 +size 1719610 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f6d4c10437174a2be4c8fdf21503dabbd232b573 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2816a10d1f219b07551f7b2546d3016c8144d635f17716ea865c12a0e9388249 +size 2031130 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..5d132f5d852d70bd5bd47855979888c9282af040 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625c68626f76b8e516ea3200ffb59a666ae2f43617cff2244d1e259912d16194 +size 1783585 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..a006d1c80cb630a217e3324aa6685be4e0767186 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_2_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2755930c35bdf555b2fdacc6bf7515fef89dbee621e1324b117b2ee9b164bb8c +size 1813397 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..fa9681ae33dd710d156717f247e88265ef7a321c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b705aa2ce5608828330420b63e1a8d2265ebab88298add8a4513a5d4c12c6b2 +size 2086264 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a2fc6f1f670b202099d67178dd0eb278c196d324 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c642267e21ac9b491336bb1af1e05df7b3f9d7983ea965018dde6861d33c794d +size 2686401 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..e8dee96a00d4a923b6f5f335f24546baf7d029e1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8ccb2197e0c06338c8b227f85cc9d9838ac397509c4295e5689a325ef5ecec +size 2721317 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f355443020e24671ab6f43c4fd68269e21b03d55 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17cdf7185d1b8adaf414e2ff5b3ff33c6c0cde59d9a4acf122514abc412b21bc +size 2669468 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..4dac3bf486005c08d9e3b306b4cb413de212454b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e15b0097a42f3b340face99ba0f7d24cfe9c92ea10f71cd40f0ded743f77d3 +size 2719968 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..85ef566fb50584d5026fe512db036c96f89af316 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71cf920afaf39be1695410c386d9edfb4e09fe32b87b28a355020013f7f79fcf +size 2768922 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..b9eb0f1ea7071da4ca584d084777c5d7fdee3101 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d942659cc8887d4f375f58c9b06f3aec60a9a39200cc174edd0adf3769db5141 +size 1894772 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..3ec7371801436576ce7228f32e524e7c70e5e000 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f7e6af550768b717b9f71fc31a460699dc323e6e68b6588d618b822d070e0f4 +size 1698877 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..a456a01519e5497464ed1209619fb054e667a8df --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:869503f0f0341a3934dc3431abbc01fe81d668a3b0fb62ea4e3707f9a5273923 +size 1804266 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..59a1633f5d62ac97480daeca388a68086a6e7899 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b027c3e80120b0565be00638544f21d82bf55797c6b7d4924ec22bb1d9e4bb +size 1931220 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..5515715224a49c1eaa86922288287c506c2bf063 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d3c80e18736875f34d6fedea1fe08f4d524eb0b6009818f4b0831b9aa860fa +size 2067371 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..c1e7b7d753097b3cfd9c157e4c56c8f1bc2e404f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ffb5619de458fa556f8b548f6b5fe8c4766ccd582db8bab50e44b79a3bf2832 +size 2130873 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..2a444c411e3b637d80ce3185a629b911972b5fc8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbd752c89cc0d593ba01efddf8eaf6bbce392c852ceab7ba4c16ad13fda17c6 +size 1907442 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..c0cef0cb302817fb818847f58606bda6867e4056 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aebab24298d2fe7b0c7e9256aae3f92fa1404bc279b616d35e9b984f31c459e +size 1671879 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..cbd3d8fa880d0abf720c7e7b2da308d54f8449de --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b465a124316505330ee164357629f9c79130624e6632ac87af31ddb21f76d0e8 +size 1894159 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..0f9b25a765c555bb25ef317c0e7b4dc082a3152d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af07db308acc013ef3c317e0ad2451f45186ba1546fdd91085e90797e60857b6 +size 2033884 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6d6af1b4d93358a279b949b456d57e3f4dd0867d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fffd58f8852ea89930450a6f4ba8b96687635a9893c5b906f0495c6c35f303 +size 2025963 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..51a4725cddfccd61b6939d5af07210ae379f8644 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b2f349aaad8c2914d9c1c45d117a57e15a0934f51c5dc50a1c3a0f4704cc5b +size 2005117 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..e9ca683f0670cbe07eae609fccedacac043999a7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74755160e626ac37d2f0aee453f3fd56167179dd62244cdbebbd4c62ffb1d687 +size 1898686 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ebd54e467e58724a6f988af9823e4c0c8b861cb1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404d8d12623d230dd3e7e026c06b53574581954a13d174d422e86071bf2df32b +size 1764677 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f926197a38c9b3c47e416daad67057a7ff1c2d8a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cbbd6f6b3f539c8df20d46af7440bb9e9a417ba9494183a89c96df6f22f6cc4 +size 1909736 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b42340490471b24d59530c3ebd4baa9c4f80bea6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a641475624cb17ef623370ced969f6bf15933d1ad97028f7fd500a30973b2e +size 1768330 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ee91e943d47f971d26834a1bced642f3b3d610df --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3501fe8e3d258cd90b78d46638f363275bd3047cbaca68e8b869cfcf301e5345 +size 2178259 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..a0ef92ef73e7b1451310729b3871e5f0d31f504f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a83314f61c920a58d4a8a5838c5f415b5b2033bd611b9eeb830f366aec2b447 +size 1888985 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..54148163697d2c433d5b800733401adaf26de618 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628d7e7ce137ca630a7d2eddd78f88e96548dc8c6e7779d08e35d9ed6b7113b7 +size 1890271 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..6d3606e31db64592a8cc4936c576effda2f0fcda --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107a2ad3f91aab554cfd757bbb2cf7e7f38917547af5e86b3cc3e629558bf44a +size 1764317 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..df679517befc66b58468aaf2966241f953f5b890 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12962714fc9a1a15280395439ca389dc401d2b38204f74d79937463af8b9a6a6 +size 1863920 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..d7d84b18f2c631d0b4c25775fae821640163c73d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145b72b6e9e46017807ad7caee175efc77b52e756beefdf79c2a62cf4762e310 +size 1765777 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..5afc16dabd70f3fb706c5fc542127a647db55382 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5282626dbbdc2ee4adc9e4543c641c7d81020794a7174a1f27fc846b26640ba1 +size 2063482 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..b4bf169d8633631f438f117f13a4d4c5ee79e2ca --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052eb745f6e2ba8328e9907aa8771a6672e0c529fcb2b510e86926627bca6fca +size 1978045 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..e97f6e4f957e5b385aca6c4fbdf3720d73706940 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01514dde468bd896c5ea33e451a7fbde1fc9e8f2b2cb3377fac92aab5ee9a90a +size 1884044 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..55e941bde9fee017da7c7488cf178a056fb53b67 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7ce46ddbf5e91835da5c298bb0cb66748b86c9d31c171d0215eb257fab4ef5 +size 1785768 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..378b2378c67d7c21c5c10e661542af8e80e299ec --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d3e3177f1fae68e074595690c4d775773661ce278339f9a97dfdd93f7c1c86 +size 1882125 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..798463e06ae981f78e08d379dc5846d04fb74f7e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2950c55f136690d4410d0da43d9dc40e0875d23d31c722526002046409831ad9 +size 1836414 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9159e448e1a1f3ad75dd2c1b7d18b56cbb60be23 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee5247a13310eaaf4e891659e58fe0795305613b02f4e563de374a778041b1c +size 2013832 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..4f7b2d6093e8b758d516fd55782dc06acae63172 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50223ac4178ebe02eb7c8fdea2f564d81e4dca9f0f3be143b9b02cbb36f5756c +size 1856500 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..ef00c93d0bd8120736bf0387b0b34ca8e9664a81 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22ebf97165a5cb59f495074ae8da93bece845d0b6f71d1f7aed04fb89cffbb3 +size 1882877 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..543c2f1efce01c413152321f26f84ea9984538ef --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed90cdcec90e8cf24450f6a29829d311b0c6a9564bc17e4d4620bb4c644f5de6 +size 1636406 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..5601f78f2853b75672bff897814213b36dbdc362 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d3395a218091b8c7aa34ad689e5cac0ed851047e749d73137c57cccd0243e6 +size 1837748 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..e82409c4e93929e4177786797fd84b0145a316c9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84036ecb5e27a759f47a0ae8b3836b2fa42130fe210edfee89689bf90c0ff261 +size 1812460 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..382a62b8cc2a2ff9eef79bdd8662d94ba6567db9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f06fb1e7327f551098746cb7514cf8cd2ea8d5ce1f1695f59784d93f89aca18 +size 2206710 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..eff9e299089103036c8eecc299bf36066f9a430b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36096026b838edfdc8f08c5b1788539aea2321c8e719e93d8fe6c40068e60fb7 +size 1853464 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..25be21cf2e25a997ed0cb32a7656583f82592289 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39f0001750ed27d429901fc1ee982dc1fcb69c2b90659506a0e3be5013d4fd3d +size 1894918 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..4b2208ccee073b8067ad5cdbc7f74fc06d6d9c75 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbfed0d69182e00421dd9a8b060317365fc7b19e46429f6984cf6e6039ce021f +size 1691335 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b8702414f16a6e262c68ac51e137c9d930cb9619 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47101efd1cda198515ab981b012e2fcbd17094a6c4007b038d3ad4d5b1711cd6 +size 1872457 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f4f019518ef9e518da5edfb9bebcdac626f811ce --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a237a3bed4f9f97d349335ebfca70eca38da4fcb5c3ba5a96263c0bd750e096c +size 1920904 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..89ef9863260bcbc8f4af17538a3e132123d1aba1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e0c2776f75ccf74ed62bf542cfbb61a410e37120746d26aa86392a47ea64028 +size 2097032 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..54d06eb8abac8a93c1881615485664b7c01b1f55 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47419cc9a4077a8904c5cc53ac01ff77b49a5d64f78ef680d5a7e0ef0ea305b8 +size 1904186 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..533041e32f34eeba229e7aededbeeb563f782d3f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5466c7164169ae39557149193ba6a7411346241be4b02f04b901a2aae9e52a7 +size 1901918 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..94b6b6c7030eee9d8864e10c08ccb47041b6bd97 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7373b9778869f156aa949e50cbd67d8ed9ce4920d96648dd42166828b29e1a13 +size 1708977 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ab6e2f3fcd556c1cb7b844cb4820879db311bd70 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab97fa848bdd830d97319d39d017c53e786177058358d9776027e78472a93b05 +size 1853720 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..54f9d079bed14c3b99120eb52fe0b9b40680476e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501880e386c5291fa9070cd8de3185a9ddc9936d649921a8cad161d5b8b6d235 +size 1897304 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..81b1ff89875e8f09eab1b0275725d62e121ba335 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfccf87ce551025b05ebd815ff449b22a387e8dd955acbbc871b6fc012b8c89e +size 2175730 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..6b815b799a2f5a1031353a224f8507f102f850d7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade62ff61436eaf18e16069d52b89fa176a2d8d483834d0bb88a89f584e72c84 +size 1906819 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..5f2196f5ce6632c7f2018449f09ff137ae7aa9fb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874f01eb44826e67344910cff8757a1cda0efc97c4501be378551159cc1b8708 +size 1889748 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..0f5e3c5f55e635ef615d03cb21f5057f627af7da --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591159e7674dabe6afd3a82874169319af99c68055fadcfb6a6d1f156e5083ae +size 1734171 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..5ebcd3e63b3fbb80d93e9f0293054b4ba3993f2f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ee1464a054428a93b951644ce474d3e9490e8fd872f48cc37f3f456f997d01 +size 1741350 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..380121b34bc13a062dd57deeea8c7fa52faf5571 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f00800ea79acdc75a603290c0a1c5bcb73d8b77d07684d4b09350b1df3c9de7 +size 1729957 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..552f5c6ea8c78170aff60b219d04ef6266a5c85a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efb688a969624e9dfa6b7ee51c721d6a9224b79d2cb7ed268b02e9d8e903ad6 +size 2170937 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e2612c67450b9c15511dbe9c477ca5ccea968051 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac24c6c8b1e9d5fe0c108bb04eb0c07ca66741bf8122f6501086580282c54d2 +size 1993514 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..69705b63b4d960fa29081e2f6c9181e91f0d8ef1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dec51b74ab7210ea670809709d0a72425f0392398f374cdcadb5b3aa7ed9ecd +size 1885992 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..9d9d6de3d1a572976c5917b0738cfbc9ab19f822 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d47b6651499da061d715d5c9a96bb826532daa2898721832ae65b36175dfc9 +size 1648565 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3ca028a7c3502a62edcad71b97f156c3015287b2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213baf11830cc2892298ac6301e1d133b8c0d27a1775bd447c7c10c57487bc26 +size 1740095 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..0ecf7e91f1d5378a6158725a3905fc7e8e585e5c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36554eb503f2e98e4e6909b5a8ed3376fb9738745f28a1e3cdeba7995c2979ad +size 1930088 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..fd2ecbfa128428b5d15166f9112664a27b509e35 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8744a654856e8cdc37070f7a9a30b5cd59eecb4fe7053b69f9eed6c86e27b19b +size 2115407 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..d0cbdc51be3dc70c91bb0ce45c0d4b6595eeb57b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_3_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbbe222f916fea76c7e081530024a9f202f7c54e8d9c09cc01e996a0b1351b0 +size 1927523 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..0f7601660e52847c4ece6845247d5472f6bff7ce --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0cb2b90acca43fc19ccfeae7545da22d89f5252af70fc9587f484361d1e969 +size 1985262 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..96dd5ac47abb262126f298524a04bed41416a344 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3537d417210b07ed62de89b6d1b0af4899a37be3ec66f44468c3de4b08ca6dc8 +size 2753433 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..aa194bae0efa4b887980642d949cac4657dcd002 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3d3088539a9b05510fb9dff859fc40f8feb0d5009ce8439f0c170933974bbf +size 2735641 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..11cf23a78f8a0e334c55c625334b022e82a26259 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b46ad49292b7d90b4d57409e1d70e61c9d238965d9afbef68d7c4bc963e718 +size 2760544 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6dd187b0cc3723b1604bfdcf92a7a4d11339acd1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5928bc0a467f99927ed2d6d157b44d7c253aab54ebea0f2b655f0e1876f8bc83 +size 2715558 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..497350a8cdd9a1020c2ed16bc0d881325c6a74bc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb68c0dc2138c6668134228f39c0ec9de81a14344ae262fdde259b9798da9387 +size 2714886 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..055acce63a3826a28d44c8f844aed5378687e516 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862316a3dab762f8f897933e7714335374145726fe4384cd8256171b7bdebf33 +size 1800970 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..506ea772fa8c84a3585a412f5117429b49587a73 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1956f6797557da43de03260caa1b8cfd620bc51382a28d2fe2ff06aa6f4704dc +size 1969967 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c9418c2c7d2b028ee2cb8feb72da1ac84b072def --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf5b7fdd5191a0d84f6e3e79bd70d5f0e0c659b9ec897aa251c62e5df6e90a7 +size 1865616 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a32302defbe06ed7beaba486b6144fa9c5834b0e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5abfa85efc1b129b10e1560694af57937c48fb75d3f834a1d28715c961ffd69 +size 1891708 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..e4826b5635398fe449ce48500253ec1a7e48bf3f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff107dbd097126dbec7e0ab22e658a33c48515a0c45036c25cba9762f6b4496 +size 1774049 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..53d287c192f222bfafc992c85bd994c8984d9d80 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784f6c621c38c6df0441b6c91adaed7e6b87e5644569563d16738fe1fdda0a65 +size 1895020 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..9dc38dd7ce3d6b704299f893c384e008faa4ded9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e275eefa8ee5d79e4ef671e7df1debf76c7f76eba5b3bcf5e340ac7464df51f4 +size 1801667 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a0f2e967ce9142ed77693a0abaac51d085a57660 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f221aee17476357ce7fdfe9a9eeb3447d4008d1fd9ad501b388f49aab10dd5c0 +size 1865967 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..bae74a8b72683c34d145e2152223679050c6c9bc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b8797d4c426efd74978a088757f0ad11d5b5b49bfb7a6cf4252e8249f0987f +size 1825302 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b026d550afcbb201c7ad08a7dba487cc46d94d98 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1787437458fde943e0772c70f1cae66f379ee5e6f297de61522ce2d61b0ed2e +size 2009242 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..3c7f896784e9a5647c285b8333cd12da54374b16 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aacbcff8f5fe0cad23f9c9457cd6f214021b44beaf4fb29cd864392f4478ba6 +size 1736434 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..36ce2d1db46b6833437e2cea20399c752b0614e9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ad3892e7195eef818c291ac45e8a9512e28aafae95bf35c8ab946ccaefcfca +size 2046296 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..c357d86f56f1137d51da34705f304c5d2bc5c8be --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8119804ffd3f70a20da4d9e49d2ea5a00a47918f422f502a0133b6a088868d7 +size 1806298 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a8681a830d68cdca025b0220bcd3d5622bfc13db --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb6b9c8c0517430c0e6003ca069c0cb7b9b7b98e62dc3b6c2bdd16972ed6c83 +size 1948520 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..6ca0aad7873a8418d8cd61b658c946974ba741f7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a334e8aa397617f044140357eed67cf5f0b6e41dcedf8f142dfdc7ec3138482f +size 1965161 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..3d955e128e6d61321ec3092f23b50cc43c3a17d2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6b4bd8fa09b0b63db7cbee2fac541113ffc314f3c4ecf1cb10b66598f8bed3 +size 1941152 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..facfa812c69bc7787c41aeba11b8b90978856ecc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e18134a41da4afcc95767c259582796baf9246c1ef84b5594dc659066fb305 +size 1725094 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..cb1852c83b62f394236c65a7d445a40c3a8dc9bf --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae97b76af1d6880b354da4e8cdedfd59702bc83aeb60cc56fd7810ad954afb18 +size 1839389 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..59370476bd2cc2cef96ccc8df87f13a9b758c546 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4e07af13c2c525eea1ec2decc606cb44f831a24e65276b43d34f6f53cc13d84 +size 1800932 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2468e250dd26ff9575a8eb84d36a0a15e52a79b6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd01410e2196630e9860a6b0add147f4273977504decce2afa771a387b5b3194 +size 1877579 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..196b886c55514f55a028139eadfbe4e8b8227539 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f0e66fac2ad013240ce85c51c168c2a582dd5f044efcf4eefde5f7ccf28493 +size 1843199 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..4181f1bcd63cc9cb1a7d8458086b7bf2b03fc197 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271470e785b37d3b90030f0e5e1bcfebaa393f6c17a5e25e6f673381ff34831c +size 1817510 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..506f2aadf01d59b03163b339f651889a4f7b093a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4291e9e4630747b9e3a981c3deae7f42e6a366e533d17c1001f07b053afa616d +size 1757870 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..2685a61d3412accb407d58b402cd39d5e7f688db --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7e30d60451522f47049ca16601300d667e847f51135628210473754dfc1c7f +size 1865943 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..2531498c4e1ffb7ef5b5e8a0be2c6ddfdbc59f71 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b030a69828e640771c8478daf8015bc8b71fa28234bd7917117824eb644f2f8a +size 1783949 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..16678637b806ee0492a8f5527b3eb34d2c5a9d4a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc667ce4858395f9a4f451e6ceeb007748027e09e80ff0d924448692c0d702ef +size 1955480 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..1a618e3380bf4eb23c19a8edbce9e8d76d2efc76 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fe4245e6f4ca40b6b8a307f147653b0c48fe0cc5f30fb12b1c7e8a66aea152 +size 1929931 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..40dd54c2ad7e30b557659d7586a5b2daeb9f6106 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5027e4cc56bde4503c119112e4064ee742fad227d38999919650b726b27fba6e +size 1871848 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..0a15e942cb16e3429bdeac2968acbe4b770c8f9b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1595ce2d3df0d95a2d5c8983ade641161c2e65ca34e4d4ac6faf72c97ce16dac +size 1703486 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..eda1a1289759374bbc3bed5c97618af287320731 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffab66fc68750a16ce2126bebecf5004805e835a62432fbbee6e4714af3e4b71 +size 1870724 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..0226432f5cd13a7b5c604d10690e93c8b4523a95 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f95261e79b14af56668eed0d4b46bd41c26a1fb2b2b4d53de2e745f3b03eca +size 1777399 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..9aa1470d31be30b618c7d5020d0af1d6081022e5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236d9aa696b0d56f09c78390287bf1b8d4ec968d6486a4cc6181bfcfb2eebc2f +size 1889232 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..147ce96c01654520f857bfbe1bc653973d3061d2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1eafc2c8b0ea7c997e935600d73b7d7744427c79e70a445b0d29a77d19f9276 +size 1806766 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..c2dd8e436d2f7e0921d54d5a9b2b642bd2cc0a38 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69865fba3892e012785c4cefb40db0f616474015fd46c883d0dba90da5a31831 +size 1854329 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..e9a88f61c25b71dbd6bca766bfbbb54dd6da68c2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb458c2bfee94f9fdf13e2ea3b6b214ca418e208e7528b4d82faf61398a690f +size 1729458 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0ef4a0f194421ed080f84e3606b6908580d90372 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65fae3428787df61c4e80c41ff21391912612704ca1235c437a9cc8876bf655b +size 1815044 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..71ebe21c1b75564b149dce7951045a0431f6eac7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a739ddbc2fb728aec8abd938eb209b82e7c453d75fb480ff20b526e261d27d57 +size 1804606 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..5bdf4aa8908b4f004ba597e19ea5f643e9ba1106 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260bca5f4b6e526ea785568b208fefbb492b068a37b40b57268f20153ef5c371 +size 1967551 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ef67e54e3f5d979fd977343962807c289d5eefcb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9603c9aa7456c1de7c6a13735935f18db5fd6a011cd73274163b40e61970917 +size 1933372 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..2ffd491999d2759200893911af6fb82a508bb63c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4d4c813fb2ba01dd1c02a0af391c9fc89e27fe59d7ce2bfbfb8906cf705ebd +size 1959367 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ab69e1627413d7dad73c4d826887c35ef0cb068f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af53063814c92ea839d89d4fd9f8319081564abed1c4339c51a2e6baa64400a9 +size 1772996 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0a78d2a3815b876d91ba9a301613bd74cc053a7d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfbab6671ab48d9f003c54ac804d32652c7ea707e706de144a40276561b1646 +size 1911425 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..759567f8d0f2280e9b4e933adcc8f58330b8b5c8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13a355ad11ed3bbc968389310f18072c42a9b6b48d6d79e092d92e032cb024d +size 1779464 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..cc7a55d5e734471d2a7886c84f961ca3d5411b36 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65ba497cb70e3624936a4244dbdaae9d024c5492acb610663851d8af48ff7f4 +size 1997518 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..658475ae822f27951ed88d688c6ab59bbca5f7a3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d2cad0a6d621bb0b6a322ddf434d158cff012d5e2329517199d0a032cec354 +size 1904284 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..52147d24e1064bcd1eb5b85f09ae81387db09231 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228403b1bfdebbcfac3a7d4ef2a21289eabb3cf90034d0f5677a34e8f6e1c34d +size 1811452 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..58ab8de9111bf0b35b5b828feb422bd589724925 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b647480e966950e70b17faef2deb43dea16cc51b5e7ad4f348ac791cc6e1d5fa +size 1671073 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..3b931b02c279d7c8280fca49f6a393940da6af19 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f84af9525d885d03d0ee423628017817b5986f6d193307336ddc1c69bba0e4 +size 1746920 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..955778f86ecbd10bf6e23c3c28423f202c34b0c7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1de1f46b918835eec99c6a10dc6af23cadfa8c7694397491438a4bd85f1e62 +size 1782366 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..47e0167580282a9d75a4e6dcb9887043471f282e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c96f53969c7ae7e0a7121d906999fc1d31886f205223c824ac457c15f290d4 +size 1950729 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..6f6055dd219a5793047312fd36f06388c66a5a6e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be1d5036ed1c2768a532e0306c13be89f533b40533723ade05bf7e718c27213 +size 1909248 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..3c20585ca61ca9ebfac58292f8e5c9bf11767340 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23cf785054221653a57de2bd1368d6ae2a6506da03c33d901f073a4e471fde92 +size 1902068 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..f16989e2b8f984cd90ad816be0c9304b5b793599 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c855d3860a30111ac612a7d0dbcc25833da7678718e80ffaa7653cf4f167a07d +size 1654312 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..416a6b839a69f7fcbd6b592ebdcc4ed96d1ade7c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1760cfdb328161d9103e0b54d09625b0b24032aa17fd25a890d5e269fce9dc +size 1872222 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..dac01042a0e726b63f530cc6e3e9597a42281423 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674dd7ec3e5e9bf5c6f2ff971ff52b46a71ce3dd219ea38e8c3f7e42ccf50fad +size 1787710 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..dc74f9018ca189733626d9f204a92fcc0fb40417 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155f869ea99c2643cfb4d802317e752f0c9de189f3b9b75960be5433cd758583 +size 1816831 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..fa8634c82ee73a96ece99029d3c7c93aab65d210 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0897a7d0928320aec5906bf633e4301e4d4dac14d07a9a33351164f07aa5da +size 1885991 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..e1aa6bf30af6e4243a89defc9944b442c540f393 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e410c316af1b6e7316f609da0da9237a03fed6fb92bb8edb4fe6ff500f933e6 +size 1900930 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..50df38428423214b1520ef65a2b4035010cbb614 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66062f586000493ab8c2d821d42683512eb941e791a406c3e22e95b49a17a4b6 +size 1754328 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..27ea32d1c828ba921f6f84ab4ce481b2f2a9d4f8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_4_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18615ef69c6cb84778b11bfe8b4c56c48f7c1c49d1a8d40907ffd80b06cb27c7 +size 1876725 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..af720751e20e0230dedb782b110448255b0de61e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63e22a76638665eec7ceed94d94cdb9b1af645656266fff21ae6d15212b300c +size 2075053 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..b1715baffc910cc78fdc44733aa9530a98ed4214 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7087c40e6cc7cabc5e4a94d8c452d774857d989f421fb579edd986e46b3a6942 +size 2777032 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..dbe6116bd442f6425758f8c722174d51505e17a0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb03b1f2edc76c0042ddfd8d6cb05a245c16af880bc2bac356cb19a9f2a7a53 +size 2783896 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..985bddf03617ca380ff02f0347d233e640e1fa15 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e5891cd651d87a8e7540a7de849d7f624577a0bba2df167b3cb99b4e3dd7724 +size 2719870 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a1cc341b1b54d0c8e30d98fc50927fe70d2c0fa6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2c6bc4f36399a49dac9042078c00a948a435d41b7390fc025c972f13e67ecf +size 2743587 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..84f4a3ecb4a5fe7f0a6aca661f36299719cf7f43 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162d5df7cadca065b4ef723fc16628b485ccc987cc27e39ac04c03e6b4e22a40 +size 2722888 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..f3ed59916f1c261e43b8e3ebf461aa54562c28e2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c03a40dab1a55e5da126d81001b03e19aeb2b838a7f83b0297afc651ce43935c +size 1882126 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1b1fee10e15cf3cbca1487f5fe036c139124b8a9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741e87ca0eeb2d45a8b5aaa22dda7ac0ac3d3f8063d02ada380558d11b665010 +size 1640659 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..543a675aeab4feab5ecf28fe2ebcced30247a03c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a0f54b03eabfe4de949e512f3be61c5ca06d927ef9a3561e777235c530e34e +size 1801131 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..aa8732911b5fe12dffe33331c7434047fe5fcea7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387403fcb0038b27f4797a4f42ff3de5fdc3318a23bfa4a6a738557a42b854f0 +size 1820240 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..24101c7ec2fa6fad5cac43c5223f2c63605f22b4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:915d8d6fccf865ac5690609f6a287a088dd3e763a32bf9b53a0afc2e0272aace +size 1990188 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..15c6219716c226469fbda8f4869ca145fd26366b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872f1655d4ef27574e910cc48b74ddf96bf56b1bfd325182feb5200ddbbf5887 +size 2138211 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..3ef2ecf1bbfac7de3183aac5e03a4840bb1d5a9c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bde1149bc219640e68ed9cad7f15d7a7a4e867181308dbe7bf0746cdd3f187f +size 1889670 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a45e46f8c83864824e23c75c11af21715a76bd2a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328cf0834f648a9fb124a37b10cb6a2ab895c75e1fa286b5d777b0bec9d5c9fc +size 1886114 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..8b3ee53bf1e11b286229b0efadc5869daad1b1f3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce3007a187f9fe5619208a8cd099dc7c17a6d6ca21885bc8e61d38492019d59 +size 1892045 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..7621214bbfb0d1ea525ec24fc840e8da5504f424 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a579c7923042d8cf8de0f9e5cb0ece685b580581f0a9f3baf4bc65ed72f26d9c +size 1794018 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..91dda7c3c717b5c09523f6fd47cbf576868f06e6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae8a54d7ff4256b8edb5a9deaa2b0efeb6e7946d6b222e9be8a309187c33605 +size 2061845 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ea31ee6a4479ebb64d51196cd8be8a3f225a80e5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a093497d3b358dd07eb7efad5a4079dec613121ca2b27fd2717f3b595a382f +size 2014732 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..2c4a4f310b857bb42a4f3c43a5a15ca15e7ddfe8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2137c987fdefd4dd1c42e59e1021fc0905a79c3026325c068eb6a5496b2b59c9 +size 1901918 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ed92516143eaf504c22bfad0d581035f6760c66d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19cb8b0457732d30b98929b934086fcd8c9b438367aff0d4af0a35de0202706f +size 1927604 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..8adbd97f758a9ace80e931787732ac4cbbfbcfe7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23c204c7a773a5c05b043ffe63dc4320881af49882707ca7ea74041ac87d2f4 +size 2066587 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..9e519f9fb416680988d89e24e072aa55f02a0225 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6141497781c68930efe46c49f13c89fc98eae4b7443f35698adca10e9402e823 +size 1713015 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a2607654e7dfdb64ec1dd75008a8ee76cc2e50be --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76fa1e411c517159b4247159aeb68e0c01430e9deb48b74aaa4931a51aa9819 +size 1912762 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..686ad01385ec08876b2e199413b32d3430f1c6da --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2c2aa0801f30fb3d479a24eea20f6fa49ab777999ea7d551a597bae23c825a +size 2004933 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..296bf2485db3c530214fb0f2c4256ccdecb598cb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151041b3784aa9148125a0811317e446bc78168fee3d18a31adc42638488df4e +size 1911697 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..f0c4ff8778a1b403542d49a867c7ff337d6620b9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a889b91020cc22fbbd2466e40ff1c04cc6319d927588c910e134a75a09fa31b +size 2053087 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..0275049ba2711ae003678f335de9e9055c86ece5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb51414d1ab6f357baf53a0990d087c032012fa7754ebc11e4ef71d36467f3ad +size 1911906 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..292166fcfc8e479968ac6b0f60ae3d4273def062 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37792fdcfe642a057e48165d516a2e70d8c47c1ad45beef1a27aefd8f17cda01 +size 1885534 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..f334872dbf9477a548b9aabf9ca13c583ed5147a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a970719be3283356c1d04404c6ca03564a800d4843a5124ad094fd85f68b6b9f +size 1893822 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..2f4ebdc23f94824be2449c725d45077fe5cb21da --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e506acf7b6f794918efd738a3894883d5f415e5d6d4bce505df1e27c6ac371d +size 2021232 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..05e6f968aaa9630d823ffb0d0361b61c842c464f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca98655b1432bf1a4b2c33a03b9fb08d4b83476643008b63c677eaa787ce52d0 +size 1894181 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..14000c474ef41365b327f29b8c513519ca256693 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06b6d6fd2258b64542c41aefe24310c67e30d3fe63e067055a137290fe5a5bd +size 1908007 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ae64984809579e060cc36dca24206a3725b78fc2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d98ebac8442fada259f90e0cbc0cbe568855c3e362050ea54b4afbe67b2edc4 +size 1897995 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..449a011d0e026e0e6bb137cfe0c9620ed3edd7c9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1a81d4fc53714ef309df826ccccf7aad56c5b405102ee574e98f9ce1ccddf4 +size 1707239 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ba78c33c701d3d7e98accdd680ec0933f97184e3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e7e2c0dc7b080c15ec957cd20c5774d86941dd56b6b4ea93fff0d95dbf6b77 +size 1988408 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e713813c19c0a4da60025a75205a53feea73df20 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b65b6cf25afa50f9c80c75ff28f26eb2558b506721aab679edb35ddc1d4542f +size 2046470 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..2d6e8c6c63842dc21fc4d4dd61f4eb36f2faf2b9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf0c790942e01a39f3ae0915bb660869f3b7f99a699f12bc213022efc6e0b78 +size 1885033 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..91520eedf283f6b84c5d0ad10f6e1fda789a06c5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250f157fcf5bcfa47a8647caba60aa94f039e3cbdb3a0de15eedf66d8709e5e4 +size 1979301 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..bf9709f4fe2c59c0adddf231326c31741c8cc72f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38425c4e45512795cdeb38f65983034ad0e761004366e118aae211bbf195ceea +size 1848118 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a4448a5f0e583dc925130cd95ec8c50cb86950b9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2263b8d156fb73de0d5c50496e84cb3dbc0124eaf340e8b9c6d9cd11262134b3 +size 1725138 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..1c4e70ce63e7da67b26a348cabf1e0d0f8923cb6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a24a8cb02040919c2b73a08e77835b4e0807d82a94a410208396f612f1fa128 +size 1952920 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b31b53d777ca572ae2884b3310f4e4b85a8487 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6310f169172618e557bd62f94d1ad3b41c4532ae3510e1c963b4d323303c79 +size 1943925 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..476b7b7132e7a828866dd406a940f8c8026c920b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf8095f321867db0458f18a586949b3b35ddabcd7610a988ce967f9c953558d +size 1898081 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..b045774140ddff60f4d8af2d0891a268890d3f80 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbce5574d84213e45570e89d13afc65de99339955d9b54562f1cbc208dec200 +size 1883378 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..a52d950ca4e366423b0d31370f971342dca18054 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170200796dda28f2e147d996af171df67411add49cc9642039aa76ea538d7ee5 +size 2005481 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..5cf89daa2fbec256cb88cb33e5bc25efc63a9a67 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3872610fbe588ae067575698f554ca0acf0eb645f2a82e8243240354c27c0e95 +size 1815681 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..76bfd8b49f96c37b6bca4af896017713182e5bfc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62fddd798e4672b7f24cf86c54c79c5d26559e206b7ce08ea80419f3696327e +size 1899644 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..056e6ebe0ae1675ae11893c597b478af80848212 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b8d4e31ea2a005f530868a5071b5ada4cefab739bcbd56639a0f6846651da7 +size 1948796 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..ef8e605f134f0b70479e10cf12a1669b77daf37d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f024c5000ad1d9766e94e9ea68d8720ec93397bfa60cfd0e800ab13e013117 +size 1881678 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..521f645af84d9da64c7845783aa1d1b51b9dea53 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19339e44ad1ed16b6b8dbe9838a1af551b9956579688000bffb33825e2a35552 +size 1816872 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..816407acb08dce7e994f01aa3efff28427a9dc49 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a723f6d810b2cf71f166e792d3a62faedcebc3f541a108c545d5b2a9fc1dc3c5 +size 1950319 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f4655b3610a86f7a37f220b9e2a984f784122d7c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446c9fcfd4b5f069d84f02cb995221b84c32b00915b05b9d2962bd86b6a0d3bc +size 1710735 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..b44a4aa10163f8f862c718415046d92c05d17b1b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7eb4338cb047913ce62fecd3d1a59e1a61a2ccd3da0b62c83c055890360914 +size 1904765 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..973ce9153e6f6b97256a5c845bc677075d87ed57 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f599c5c8ef6b0a9a6cc2c4e87a43210f937692a34a48cac78c94c799bcf9ba5 +size 1882219 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..cd1f476b97fdd00b88483d53b8984a4b0f5ad87e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aff3caf7647e182e63f79d8fd47957f2fdb7c082a1fbe0e60cb21339b1d79dc +size 1887729 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..eed5bdd19cdcf708d77b9f83d49d48749c2e86a7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804400ae8a2aa33086bd4273e9abf356eb0f3bd4838ecfce845fa9919716923b +size 1770216 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..116db2bcad2eb6cf26f209145eb17f82a09ea3e3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1a8cb011baa23119de697d76e5fdc60c827a4f89dc7eeb28341618ab1600ac +size 1924219 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..4bcd9d001a3133db9a83cc27fcedff60bf6f9202 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b217b77ceff6c51d15a65f73e1b38c9e3334b47d00948ae6957d53b9f76311b +size 1795521 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..be6ec8c57d99530ee72bc72c569f964400bd2b6e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6fc9aeb1673603bd8a7df2a15a29205c08aca2ffdc61ecd3d5687aacb4b815 +size 2000078 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..689787f2466fdcc081cbf0cbb71e44b0ab6004a7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3555fec2f9aa483a85a69aa5ed4090d3898758b54049dea5e6716e243036ac +size 1899234 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..46d423726c776f5b3b512777a8050705cd6e3da1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569251c2b26d98a65823fbb158d39742e3aa06ae9bcb2b859cca9f2767b1eab5 +size 1902635 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..51a0100a2ec2822c394daaade6765ce7cbefb976 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6b9a449d1a4be73ab635fba4a6d3db1190e2157c80adc4c5f9e92743290c45 +size 2030849 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..80360270a1a900902fc1be106d2aca21c1455896 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76bee13e7a70be904c37c0ed3006fd067735286062a687060df2b0d02d741aa1 +size 2101614 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a92f8c70b0dd7fade871d9178af2891282995438 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0778c0363989414dd7ff5b4352eda835ee128cefcc7e57f436540ccecd9dc78 +size 1742795 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..53e8dff7da8158df973020a6cbcf08de6ed1e124 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac9cecaaae4e7b3d14e324f6404e3f76f37bcee7ae0a99ea00e70563f42774d +size 1981254 diff --git a/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..8f1083d995e5c90929eb069319844ae8680a2da4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/test_5_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781455792cc0bc6fe87b4975a339e635950c3f57787b1aac05f2367c225b2772 +size 1948816 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..4d08874d98a06ff19a2efc6133ff0a1385de806e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2db6b1bda5ab4a6b448a71ddeef3b4a20cb6fe205417ce977dd18cb55dea90 +size 2028079 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..30f13b8a1c14a3da1f8086698f33eaa581887a24 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6149a128ba972c75be352b7e7cde9eb5a88eddcd03da92fccde77eaf566b3df +size 2730031 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..22b67a2b93a51fc1756949d2f590dd32648a4cd7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e42e3e7077e9f5cbe9506adaff6152d1d08c318f4fa5f12ea1cb6d38645f6cf +size 2702342 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..be96201fd38021b761aac6c757cd345a4f721b9a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4115ddbe278f6744110608bec59b8cc386ac9bbadefb4e6e1c4f4183f9d3c30e +size 2736506 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..e173a876105865a2bdae49afc5de59e997638531 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0b97afd7cde387dcb6717e78ce9b3baa09a6ed799cd05e2c7aefe42f7d8c17 +size 2705638 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..6f441e97fbb6f0755f360b9652ef5091292a3448 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651acedf85283243ea88b03b3ba1b3544ecb249ad65cb782d31e8c331534456c +size 2744452 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..00125e7b30d11e4d6ac905ee6256c1e8960532a0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc83511da4770b6fa8d6ce556f2ce14646fd4ad572c1abd13c7a80b3afea592 +size 1820796 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..369e10c7c73f10ba97c3d1ca67d587eaa2502a19 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4874c69d0a8319d0f7cda162e8fd1ab72d3a357a8761330918dcdd02cd116c +size 1959109 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..7df96b639a8a2460d86db6e493e0faec06918d0d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799cf5af88e689a868c13e0aeae7ea87e293571f8785c5106820bb24572d7e7f +size 2040043 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..0d4ff934558cb25eb2d21c5b8c373b5c3032009a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfc4ee2cbf2327ee71047897a110d03da32086f461e8139f38b28aa8c2328fb +size 1801065 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9f8359c4324568eecdc293936b9005545469a62d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebeec44377c2084d303487c55c8f694130806a0f6696da4c097b002232a06b61 +size 1888621 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ea9af7e2cb723c06886ac75c777bc4732835c52c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3376903e208b060f7ca204f4fa07616aa0c7311bf3caebce5898b2e81f2328 +size 1622013 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..7a2c00af9733427d072ce967e3f4d67122b75b17 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4e805df0afcffff69deeadb109f7b45fbdcc51c69855a7265b6e370f7aac17 +size 1830666 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..5d71bfbe888d3abb016d611bb18cb453868f9188 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6f43e1d17d5d087594798a426ae09c1ddad56c4428dfedf13680738ffb59f9 +size 1740110 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..73963fc4d71f8c2bb8fc813987ac8f36c419bfa7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d29e70713a206de7f4fd2328fdd24dcd646ddf47c3584a8f67620f6c91979186 +size 1960589 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..6ce3554065343ee91dfef4c244e7377a62a0f7a4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b15e4f4b4c2ff3b98e9ee1dcac4d49c0512cdffc1bdc8188be687446e53fd9 +size 1960148 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..338e549d93afcda266f0e8c3cb74d919fe88d36b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062fc6de4927aed4e56348d191f46b4fb7ad7677e3d1e9badea8c8ae327360f7 +size 1967119 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..5dfd691f530d27c239acbe6f1597b0a719f7cfae --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcaeb9cc70483f855a7a460283198fe31b0cd0c4e33d7e6c040fb921770748b +size 1659894 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..d48542154543839d30e065081ff4361e677adb06 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87067d1ff15979bcaa73588f16235df4a0fc43ab2f2c36bacf42de269534d28c +size 1820503 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..646233b92620c9b3cc0427becb36939d64805729 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb196dda5f9ea01405b68ae57be32797ae958102c8b4e5e12618a621e03119a +size 1808197 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..62e8068474cc5205bf4c411896cb2d9eb9c692fd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e7758109b03ec540fb7c43a7a7e7b38756fde90614aa6ee996e0235bd90986 +size 1845380 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..ce8b44d4ebf315e75258fc27e27d332be7f39cc8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e63815e62d5419f425ad733c233e05a33ccc4dd092e48279409fabbe948c8f +size 1881585 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..c1337db8312c79a6c8f32f466ca4c37019526834 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bc9081524fb0e19035563f9fdd6e1fdaf8de96da841a787077581f87bf7b90 +size 1956381 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..f4c9d92a7def3f73a9461a2c40df9b3a9d38d66f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43b70354c642c823b4f709121b5ddce582d23f81199822ac8be0fb2fbee41b6 +size 1635884 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..85bd44d5d3a96003bf099789f8cf32f96e2a9ad2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d2d165041a470b1a60f206a1ef55c2bebe690b3a8c11bb7108f99b89aaa7bd +size 1821995 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..16c549feb3f5b9bbd6a5d755bb0dea69d7c7bbce --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4284600c48aa32efb62ae09a6db1a50c29502736f0a6281214c20f245e3ca08c +size 1732746 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..e024aa7926038bb78d95f7553f165b44abc6931b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0956b186cb01d8ead23ec6cabd53db76d2123901937ee607514d4f3159efaf +size 1882753 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..d99a4e9d2356e453a6bbeb5e1d4d72e86d8aad2a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1dbfe6c7b5d0c094d0b606c07bf3e872a2f104247cdbc7768627db750231ac +size 1779428 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..dfbcdf67dea800672a995da04a910bd6641af0b1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0010597425fd53105f8067643a59b87ffc0a097c1eaf58fce973965178979ac2 +size 1920032 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..fd502f7ce9080f37a41c64162fa565f105756965 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dbfd606f158569c22549ddaafa66dc1b30bbba73aed23147b888ea4f5026a9f +size 1710274 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..b8d46b6490312827380cfecd9d004697d4f30a0e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9db5eb8d6ac19d409afe13c8594e7c997366c6a7b08517a0f78aa4286f3526f +size 1814815 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ba86e05bff2af50ea6e4e96884defba0dd8d3910 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40afde2afc6907c2503df67e95d88211475df5b8613a9a63f915c7f95fb79d30 +size 1772429 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c5c6631ab5ecb800252b0a1ecdb7d646335ee1d7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93969fd74d36cc6865b9977d0626ec1001331d98f938698d21a511a770badde7 +size 1776377 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..26df0c082498ade42f8deaeb9228f6ff3838e9d4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b18fe0dbd892db084ab4483f31f881fa812f91dc382a83086fd51586b52fdb2 +size 1820139 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..5c2b8c170794cecdd301002c617f3e372821eee5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a114611fb9e5ee5489976cca3e7dc79f56147f58b875edb75307de644d9d3886 +size 1967333 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..6e1d514bda8dfcf3c62bac5e0c5731605c26f089 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82416271287d5dcd8fc3c07283afcdd8aee4c9d3e54e3543d27cdb09ec59347e +size 1653551 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..9e46b599cab20550bde0bc7efc3eb9b6d13cce75 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558299a811c0c6fb99c34762d485b3869fae4dffbbea8d035993ed629ebf08be +size 1805518 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..4b486c4c17b11629ef9648decef4fcdb401977fc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd74d4cca891b152a9a3400cdbfaab488f24670c0932549d011b983291d06eda +size 1672520 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f17aee5ec211d89a585856aaafaadf98d74e9b46 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2669921a1057e5d5787013b3328b2fb4a5c49cb8b5d1dc2448c7cdb4e4935a7b +size 1930434 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..8d73fd7b095f42d70094de8428926fca70fc1534 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ef06bf29536ddf830085d92dac9861f9a74ecf8b724a1f88859c9be71b2bda +size 1779438 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6e76c2ab48299571b6efefaa7f4f0499c76cf614 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6116b56d4fbb98ab850c4a81b8457f17e0b11ea6d0850039482c1e498f28414 +size 1831978 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e3fb004d7fa8f0e886b53cf29b1332c9e6af402e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d7ea1e051cab694f47bb5da4c1cd0e0211faec3d5c3c31922b0a205fd8d322 +size 1537255 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..53a3c517e4ea00a20ac83ed9a303f4dc4056d776 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a881af9346f0ab608276346e3bc14832d8ebd2d5a50e0ebc8e9ed13dc48484ae +size 1821178 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..9067814372c471056c41afe1d9d54592cde53ea1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e66b6015485f06442ec0483b1457aee6588945c42f54d5995d9e2320569ecdc +size 1845000 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..dcb76656ee85f4468d2168c3c740f96e2209f0e5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb04f8ca0132960391f35d289af81dc71e3f82b68ce18b35b51f2849bda2b58 +size 1897228 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..67bcf49684c38e410fb406bd52a6f917a87f1057 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ce0985f9057e3cc8c3a06851ba189074c5784cca0c70696b1d60e3ffbee9513 +size 1879980 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..c54ae280d6b0b3293a059dad6ef0f775f531467e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1042b82cf62e451277928e6c8a54215d73191459b142f7807e5a7c94171702f5 +size 1927875 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..dfc5bc94e3543ea8590be15ed9b57c496efd056a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89659efe9f5849bdb50fed0f9a2da28ec20e0932e0128ca8756c8ce5f4ab92b4 +size 1614941 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..e0eaa71693e742578c6f3edcc836e463c5da31f2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbb4ae8207d2564a9c34ba2a1392d07f3586c355fa8c29c2c55916a5713bce2 +size 1827207 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..098fb0c334fb6bba19c78fcd9f8d107174f5b994 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44402d16c539157547438484b3923d943061a7e248b34f20fdaf9594a9459859 +size 1845127 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..01f096f66b7f008273d142f44a73fd622ffcf6cc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2555e2d59873054b335e0602fad30762e80e4ad7639b6959d65ebd9cff7bed23 +size 1922396 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..12aa0bfc8b7e1cc9496aa77c5e987f3a409b255f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58fd717471e2dfd1ff7fc7862046d28effbc62b358b7242becaaac122ec75a62 +size 1815373 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..4271e6bd5643d7ba9d1015badfc08869d4afef7b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51cf15600d833c6510e5cb856fd2e32ea8a2f0b389ca8c051135dd62ab6c781 +size 1959849 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..b4048bc656216364d2f1be1ce9d7ac43202c753e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40d35e91bb1e8e265ef975feed677e287762a59bdeb9629d38a99ced8cd05bd4 +size 1677960 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..8cc00c79722909c67532990cf8a382d620e1b5f2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f669567437bae0a3e954f17886e07bfe3d50eddb670fbd3b9d05eada428b1041 +size 1820171 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..48b5d2cdc8539212be34d47a7e928cb6f66d02a4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb0c98ec776f82f769951f28b923e44625ed16c78d3d6de2d419c911c9eefb5 +size 1876783 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..34ff49b343365cb548220dae78aad0dae48198d3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd921894792882c65ce65a4516be76b01272e5e76a723fa0d5912148b2f47cfd +size 1948126 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..38282d70f29f65a436d59da286388d4f8ca35832 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e036bd7dfc3316c15a800c3e3b881aef1a8009a2afaa6a5f18426e3d3339020f +size 1765720 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..91872acc3cdfa824f2a5b2612650ac232a31f3fc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b98975b98ed85d83691060129858270c3d86c3d2597a5d020771762e542db2 +size 1839579 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..8b032fdc3226ba6ee0feecd034e24f36a31ee6ea --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7e21156a1b7a4bff73a779d7600ce0af3da7ac148d8d688c0d3f8e35a451b9 +size 1604125 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..f537567965892fba41b63ae5396db112069a32b8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d06dd956d63600c785a93172219d51c46b0590128f87032d9014523ed5c73a0 +size 1811208 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1cfc568a19c3ce5a79b7ac70158d0d8609fc0609 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ad579fbf6fddee827292a8651873e274eeb63efc5e2b576b016c4ce82a8677 +size 1835470 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..21eb10547ce0cda8391c385ee8df84591d87c07a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495cea5add32cb71ed5550c278a80a39d3e86a4b87482f5bce3a148013cb23eb +size 1914581 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..caa97f6881fa3fb8d5bedaf2eecacb30f6d536cd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b4880a5ddb70cc3db8524015687d384b6a2be68a935c8eb79d129e96ea2891 +size 1855614 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..8781a463b4168715429bb1bc0b12b6b254446e6d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d4b0ec88d6ae50915f400ce3ab87a3098574d6b0d27cb078cf96d14fa8edb8 +size 1882546 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..02fb420b88d21680ec3448b8e2929a1530b2ade2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_0_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b5afa3385de7157c93818ebb0a7cf7c34351920a96dc5b9a88b46fcc1181292 +size 1637201 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..f0d97d0233be7e72eea7a5425fcfa19f5ac644a7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507944a20d3892d6dab23e64c57b113abe164684dc2ba8a676d331fa3cbb6934 +size 2064228 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..cd44d36ac377d9d9ff7a8eed01ad9295e02bc151 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d520fd4383b0c80f0a84d357776bfcea8406e882011b93147300ee1eede31b +size 2680070 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..906877cda5baf615945437eefbcd91cb4c46c28d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a3c9edd1fe78746c7eaf14f6c574b88c7628b9ffd7a1194a2ebbcfd19a813d +size 2716203 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..431577ea96d9ed8f767b7a251dfa01a0b927e2be --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:738617096b23b7438b845cf5ab4f386b4c01743e27160fc8bd7a36c66817951f +size 2740876 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..799f8fde6202a48c8da74c9da3c1fa1e363e9ebb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86c6754ccc6ae54ef2f33174269d45b2ea004450570b341dfa3408c86650349 +size 2735659 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..cde3169b2e52f4f1eac897d4564ca2c867383a53 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd3a3fe3d064c1b220c223f8cfbe2fbed3d0e1e4b356da89aa1463a866ee4b73 +size 2747839 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..8003f5d4efd6f555232bbfe7e5d0e97daafa57a3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b9ab780c34153c0f25c28facee64006e0f3749dcbf39aa31b7ccedb8049a47 +size 1883070 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1962381a58dc46baf8ecbe8fc5bcf5d886476eda --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a07f9839434b7fcceb0fe2507b3e5bbc48017eb35145193501c03ef959a7ef6 +size 2021784 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b627a2c0f20732e6989b0e5676f873a705acc9f1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873d264e00b7ffa72562249627544b52d05941e77c1ef2d8106de57b10613f38 +size 2027181 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..8cceed791b50f83b74e43750066dfb7cbb2d727e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf19226dac300091b1c0449887bfb38b20a5a7a0e922931e77b0833414ed9636 +size 1855429 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..d6a3a694ae11d7d0dab93dce234ee5639615114a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc0ce1fcd1954f38f8b0000128a1a2d7fb9d936522bf6f6eff5f28625582d2c +size 2025300 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..525d1bf692f6af4550e7c6febd7390f5bbc1e60f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d81ee63adef5cc12e42fac78dbc5597ff658c7e8ddc5b1a5920a2f537469af1d +size 1782668 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..1d3d484200123402aeb9859d6a2e2ba0639e9f7c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7795862c5166c2d073df70df2c7c5ce8ad954bbf6628c5f256f588140da6e301 +size 1899982 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..36872571941e741cc48b305859940e72fb94df33 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646843906ed1c5be72be7b7502f50d1efbb4ee6e09a6f450940bbf69fa11510b +size 1836755 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..0ee5b1c61c33d188a4b1f8838910a1a82c554a6c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ee21f09d325bca038798f169ce8feb4c99902d2ada14591161102a2ea124d6 +size 2117697 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a36a1bf35f0ef1f03c0f379397ea142e6aa3bcfe --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd028b118386d63c8dde224a16a3569136afb1240d1ae45642658267c4317c15 +size 1808734 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..1f6504573ff98ed448a984060aee088be89a38f2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937c784378a70a364e253a3ca56fad7a10f06874b11bf54507e84f9521c02f32 +size 2124371 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..3d8843438e669c01d0db1b1d30b1963130462919 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0a472ed76900f9b1f8b4b882eecc00a0291c857aecdb1b94135f8d05835ff8 +size 1920061 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..2e5fe8184004f59b9f764e14b72e387fa935d464 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb4d0a48b238743a38ae57fb1213902b3ae70d65145bb33b38db1e02dc927ed +size 1893788 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..44ad6362cbe3f6ce9d9833655c593c7325a1a487 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2212d121cc26954646d7887a4e444aa2a8399af868a6f447c9e8982fc9082a0a +size 1904240 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..1f876bbe8d57f64d55ccb30d31ad87bad4713017 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24ab36832981b15d0b20bd1de05b2d1e2180fbcf6004e1977ff98f4270ef89f0 +size 2037027 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..81927908229d9e871f4ee7f3f101742c2ec8f621 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff5b922e116d65c2f2b6d7a7bfab4f7317639c02001b9ece1b96c37e74b1f87 +size 1812554 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b05e6c0cbd1270741accac4423b7d77dff4b80 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500c5e453a0499ea36506626b7ca2446dff46c576fbdeadac2d7fc38a3fd53fc +size 1943750 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..8c0a4c99d19e71eb418891d1a437f07512683ebc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ffba37fc39f9293d07a5751e286009ce3cb85b40e3787740898424a0d86aba +size 1847502 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..5d06ed0948f890185890ad3f80a330dc771f1750 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addfb0cbd69c573afe9c0f509fc050d0399b692cd1e0c4e15bf81e71f87cc3bb +size 1890605 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a0d1547a75aab5e7bcc835c5a5933570a8ac5237 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e51963abfb4200bcbe89a94687eab2249bf6e8892424c481db2ed084b31c4cd +size 1817654 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c1b8e764ec360d291cc9897dfb57c1275fc12ff3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40aea1a3f72e5dc87f626dc054117e227e066ab38e39f3733f07de84cd4e48a1 +size 1906472 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b096a992565243897bfbf4f2e443050b6c120703 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65945152e1769dfee13c001206e413560256a41f956c61bb59c3ac7f603ea9ad +size 1926834 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..1e83ae6ce2b9c4a23928feefb7663853f1100cf0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bac29ff37926f4ab200e5de33d8999f82b3212be647137d6b8b2fe7ebbbf774 +size 2019006 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..b8a30f19c2c1a91c7e6a5f7f4f8647211429dba8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45b7a2f7bbb84523bdccdb1d7f613864e6af90e76924e904a3f9ba9a37e7eb5 +size 1862739 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..9bffc1a839775819aac8b25d8c6e7d18a55bb2a3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b91ac639c5e930b1bd92cc8d712dcac54ac86a89f187b18a902c4b74ac4c52e +size 1885680 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..e95b19dbbc65c76e8ebb9c99259bf37dd2a291e4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04faa2c94e9c64edbe81c9086f6c7ea718f9d0fac9ee1bd37fb40948966c4d96 +size 1851226 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..db66deaf4e3a7ae601d1910692cc9b3c6f307312 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb80ca8af67e059e7c6944499f83336a03fcae811a610ab69d907f449909b9f +size 1952812 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f3f5d00552da7f307e67de1bafe599870ba356ff --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1747dc34cbb87fc0937d44d850fdd367913c029ae63b36ecdb4222e2b26be600 +size 1778457 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..0a973d6a2f1bf779823917f070c63847fdbe323d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f214cbef88d715e9ab4828f0218c4e9c42dd4b1eb169d36635a3c1025e56d4 +size 1981825 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..f0a9851edee300ab19dc81ac510675545da11a67 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c01c1dac184a2c465e4a61b7ba0a333177d0886c56d7a46b67447723194f81 +size 1743701 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..74288fbf23e087224116190675fd8a4700a03ad2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1e6f510f0a4fa09a07bfc6e9193888a15a866dcc76e49d3f5a816b0c1b499b +size 1886226 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..046074bf67dd2643ae9624a86a62f3baaa6a2469 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59fbf86774326a70b4e76d673ddbf46cf99351815f90c9b0e433b912030b96c9 +size 1815352 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..bacbd91bcf51cc6484369a3fd4e5b7704ff520c1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d855cd988bb15a537ee92e26ae5fe00cbd7714a19f13d4fc116d16d5ed9fc1 +size 1905567 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..918c60fe70c38bddbdd2de22da8d62de0e922630 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56264c4e33ad76f956ee1e029135661211ab8b3079154e7be3c2d6db56ac8f9 +size 1882375 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ee45ea7c6aaeddcb74c4df7ff5aeb9f291e6de4b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52817547571e1a3db8a3fa3223ba9e7a41ef18629bc4128eedc24e2633f3b661 +size 1929259 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..9ef2903935ddb759ba4c89c27d951edf814032c1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e1df80953e1198b2b269c8e91d76562919ba67590fd3e950b184a55d7bb91c +size 1751848 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..ad715301162c63956f1ef0819a475d4d9db9c281 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185f3fba2091071c19dca72b6c878209e3cd200abcaa756d62ec50713f93ceb6 +size 1896245 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ae757b8d6d9b0a2167c9356bccb3eaff8f4dc2a9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5659e27411965bfd15f9fdeec48858b53c10e4facd46adbb5d410a6d8a3fb3 +size 1854920 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..3d92f227a21e58ace6540e8b4c3df8371d7876b3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639bfe7b66b9f953a1546a465596f19b049d596bf160a08dd9e03dbf8531290b +size 2056218 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..65c8522e5e257059af44b85c641213520e381bff --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2094d951cd12111e48527b895358bce144f4bcaaa8b216bb9175516b98cc27c1 +size 1885310 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..59465d05e1225f8a0aa0c88590425133b8debf18 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c35949748c451ce24dfe184b7047626756e2dc70dfa294f611cc48ff33edc47 +size 1944662 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ae6d5b678abb3c4ef194cd7d607243de16c9acaa --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba58bd21ae706765b459f07e25167c51a7ffaac31ad057be650294411bd725d4 +size 1840062 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..af80d2d9f0dbf98b05aece978b2cbce1d671fe50 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2fe0df669e5fcac2a7edb9976cc7dc8228026e2b21b001ed748176a234ae40 +size 1884221 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..4f2db9a416a2e095fd25c839d397db5419afa648 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1021e32968e4b25b4c11a3c088cd42143a0b206a2644cc4f3755d6eb3af35ac +size 1836877 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..a0c848a634e891aabd5a9e13d68f698cba43f31f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5a0cefb0241c1a9a6d52d4fab798ee16ac181e9ea01fc39a0e212dace83481 +size 1893171 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..cb5bf5e86ef3bad4f85e129fd4a49586cddfec56 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35d5fc4cf8e3af6be2ea83437b3d9e5ed3c478f0068399a903c7de494b8c2c7 +size 1896697 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..8f83513fb0f9aed6eb83cd214359056e8862af2f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42e8c2976293430bb2a0c2dfdffc03bde77ee35c9107caa687e36d3ef8d7a14 +size 1846363 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0233c26cc48dde90e0cb866f7c65682e6ef6800a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fe915bab6c35300ca5bddee158c6fddbe30f79a9249b741c68beeecc63f9a7 +size 1841389 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..684c3a9d3039914d33eb1fc53a4be196ade64d00 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57da791b3a28cb6fdb4f9571eee915f415b9ca6cb13d11f95537e287bec688a4 +size 1902146 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..d601210e10b3c36c0fe59466d960650258e4752b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229f9a9c8ebfa6f2c7a386878937e0664bb3975d5e549c88a703019040b36d56 +size 1952924 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ac36a0f16f3c96dd0b3ae35e500ec23d08d0c2e4 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f499ce77a54f7c4ff50e9251506f7c474ceed4076ac73d084ecffeb18c9934f5 +size 1875940 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a53739a0fc0c909a8f1c72b9708da9cafa397117 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac9ea68ca3a06111f6c40fe30c1e6c37e4e0796cb111fd74943c5601eacac21 +size 1898375 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9e2d607271c9e422f65e066589827db378575595 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67877dc288547885b61c6288cd598960869da107c9d82a351c00daa089933b8a +size 1929732 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..d3f07acf357878d873d8d61f5f3459c552973704 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d56ccff32dc1801fe701257eb1847583dafeb25a54f1be1f15e93956a622339 +size 1865665 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..7be2c7f79e4dcd27fa0623daa697020b249e6291 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b600e12dca0b7feb456ee64f08993688bd5b5a295ca38e41be9f0df8171e0b83 +size 1888022 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..67f420613ac8ace6f5babbf786b93cfce3205a33 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db3a4eea0daca7a9115eba33d255c6c784f4518ed87b8d8e25644b25fa0358c +size 1777466 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..5d237b7c2b4b545a3f7d0394ca043d1fbbda0e93 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912f0352326b281b7db269bd2ebb118e61d18b39ad2e56c5cc3b0c8b584e5455 +size 1996560 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..7980c5989e48c3e500752ab3255576264b6f2830 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3989c949585aaa67c42da82bdd45ccf0922a9648b61d5ddcf98c529e80cb656c +size 1913997 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..07a21911d3fcbe12e428a01f11a250743dca91bd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136f04bbb42f36286ab9b7a0c2bc1a3e491d26d18ff2956e00610cb60741eb09 +size 2003189 diff --git a/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ff971eccf32cd9e1de43778fcafe76e8f1dad8da --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/train_1_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247dda79accfd94b496b4366010532bb54f692e82741df3f47d16aa0293cd6f5 +size 1782070 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..81a5957f0a94fe9ec8eedf4b691de4f9e0844f68 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1783b4c9c90abd9abe9d1dda004a2f289ee84fc61b5a83eeae1804242ed91c87 +size 1903438 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..cf20aea61a9290e3e69f88486a68c61ce812cdf2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7480788a4b21f2015e6ffd94b99a1ba74bfdcc32f06f1b5060d79652c3ce000 +size 2697533 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..6a34d379c4c993d03c28606eff8bf260db854c21 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1047876395618bd6a18f9e5b1265079cf36119163a60fdaf6b08fe5afb71cfd +size 2732534 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..14c07c241110087357e76a9228c988849462a7ea --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a75759a70682f5f93e549b896f11d01642189020471f7ecd43734c126a84a92 +size 2784570 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..2cb7841066610fbfeb3f7cdde9b1f75d24461540 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8449612affad3bdc0e44ff80c03f9f0cef195c8aeb2ed423a9ad18e5f2c2a8de +size 2732719 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..fc5c409f530ba7434e7e74ce6ccf68bb83759194 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b6a1696365e76ed13203875fd45b90ccbd76b9d460030f2d120e065c0bb096 +size 2749143 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..5740b6182270c27b841e6556d7066961ed49cb29 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c50957c2b2c0511a087d7d08dc348cc49315b29050e82bf4910c4601414c615 +size 1666486 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..93e1f27469766dc2f8554e267588fa8d981e248f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6066bf3e3e5e6b3594163d4c47589fcef6857824dc4db5a4e446c9a3c494e9d +size 1887987 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..060dac5b8183c96feb84754d73843e0a47cdcb96 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082746c6b599a38eb50c2f809a31c1468353beca748e6001bdb509540dce6d33 +size 2034849 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..23defad266f5f64b59cda5ed41b5de833fa23adb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c10619847bb420c5249a5832dc063732cc366663011d34c26a7b810feb94860 +size 1679304 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..2e375581a6afb18fbe0eab666268e42769e30c88 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8cb85eccb0306bd401caf346607fc39ecadf9a73e19f106f728b2de93fd47f +size 1701716 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..47cdee9e5a4ab038cf959c5cf2fa84bea89c3f44 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e1ce285376e0a7b0f654e2f10a53d7d005323d8bb8b52a7b22f7b6afdda1e1 +size 1840541 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..064b8da65fe25564a69144768dee2106f8c8946b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201b96a93dd8bbce9480a27453ccb511f24e63da4e7e8e2fee7583ec001b489c +size 1678188 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2142eae924b81d015ed4e4eb3a522c1f5149c0ba --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81167f8a4b56343242527c01dffb5cddd4166fb905ccf8f9a3aa0ca4a8639401 +size 1761040 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c0d7f34d8a3a1af9cfd006dea6c87f4b316e6442 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94846526be29bf923af65f543e2b2af3610ec311aeab4e4a910a8ae11f7cbd8 +size 1736409 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..6818cb46d99982b2e3abc97a9692241193a75f21 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5f89799dc496ea4837b8ac11cafef5cb0ea46bfac9f6dac8cc302eebe27be0 +size 1798411 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a54e41628b33424920911b31a14b3293c92d6ebd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e24f872435f72c07ee3acf983b882ca399cd7fe531ef8aac67604e3bb9c6ebf +size 1839798 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..9cb3e842ac93dcda2a71dd43b4807bc169547072 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f77e2585a4d8b5127fab4e123a7b7e2f23438504b8e368e2ea8bd7c00f6911 +size 1857567 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..a740a47fb98af44c5894107bcbb8327003b4f2f2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:538e3bc07bb722930d1c2e36189e7c61fb72b5521b26f475d965a5cfd5203772 +size 1659262 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..d47bd3ad6513c5002bef8685cf933824673fcc2b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ddfe85b025e97a898dc19d8df54a78669f02dd73b307f3c04ebfd4fea0de22f +size 1876400 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..e38adff9342d8a39fc29459e3158f824ed757025 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90fc9ccef72e81010172eafaa99a04ca8f56a2069f41fd52e85985245af42f56 +size 1877325 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b15b2ccdc0698f4db693fcaf0d1c55d250bcf302 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c80e5c31585fa1756ee26ec9edc91359449f034788abc084d9b879387fc267d +size 1782853 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..b198449860c3b24a91e8c04eca15dc6de78d6af2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0fdbd9288d14437c82ae80cf48e7cdcb6acf614c12cfb9e8e692aab7a36d61 +size 1539590 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..63867759097259bd01a3eacb6cacb7b13350933f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca86e6bedc0e8d5844d7eb5a019da217121abc560e64fd7988fa0f6a2112b2d9 +size 1846645 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..014b3764687a8e0a5042aec5df9b508f7d0684b0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82471ee5f62faaa3170d0a408c12c84c0b760c4084572cc38ff202746ddf5e9f +size 1670515 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..82732dc5ed83d904730fac933d63e81672ef1f16 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29695f98cd0ca5966bd79ac8fffe9d5f3ab93eed15822102edcaefae3366292e +size 1860018 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..5ea00428a44fb93ae80dee926c11ea80d15ca0dc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de33dbd80e2a99ee73c034ddcecc8c9e222566c9fbc1585001d392df56d2d4ff +size 1782786 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..7c4de4ebfe0e2399a1e85c2efbd61b6c71bfb051 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f24de655a5132f3c400c447d011a3a90f751887c3409d8a5f79571b73caf46 +size 1641036 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6ee2d667536bebbe520cdc7b18aae30e4897060b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129adbed141a0b1fac59df719207af0f992ece9f93be3285a7c2364471ac0abb +size 1735114 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ef1c0dbca37cafb5149423cf9dd1c3621562ec4e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c80974bbdd05aaf6c49256472afea9c7340516f33c7eb0d837e4800c7a8910 +size 1888750 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..34b1abf11b4a741d920fe6d76c0d6c38bf64da3a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fe83d6ac827d07d074b359e1aff5682662038e865137800cdc0299aad47fe6 +size 1663733 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..80d0079fc8024b42c929c9d0b37efff85c6ed5f7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9adad1298aeed00c91f219161bcc6c6680741eebb4689f753d4853a2b175ce +size 1842929 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..8607ac32ccc061b20bbda0ffda66b9d0b58ca647 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704ab8731642623f8e93193ac8877345351a1defa15fbb77b7e9ea4013e2e7d7 +size 1941614 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b7de54cfed49167e65ef428c53274a893e2d1092 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54db1e65395d95936007da39b1a015b762ef6b4a1f586bf87cdec722da973d94 +size 1684195 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..694e183d7894dab71eb83429d0a79f8c5e8fd6d2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37537f3a6c300cee14ef138445dafd891f47a4f9388374e4c03918392477421a +size 1522479 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..db9eaa5eff028b7aea0caad8dcea6b78fe8f766f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d749c04bbe884cea7045494f4766aa007185038968f37ba4380311d755f254 +size 1855662 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..0b16e34e975ef4c989721f3d7388df3985fac923 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09db6275308344a1d1c34b01655d7933d73b6919d6594530a2f26e11ac53dcb +size 1651325 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..d4a250d29db73e6e172c91804c631376ddd4b25f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a265d568e2445e93a705e4798449d9cbfc37a1426d11f03a18a1979e86a5b5 +size 1828274 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..f2adf44b766f1fd194703f611c2ea77b24c72114 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f575e0cdc3b496a520f440860fb1c233a0971d218eadf486ee7cc35ea0feecc8 +size 1882723 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a9980bdb430e8ecb37bcadabddb62f60349228bc --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbab142005fb280df30a3fcf50ed30153a2a325b7eac481e59845b1dbb235c2 +size 1708456 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..ea72d1e660a6c4629db295f928dc33ca757372b3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb844ec23079e9e4d2f1ea548adecaf025004de8e7fcd41eca3e0842adbc1289 +size 1626118 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..8fdd36ebdbff109129797614fdfa74df763a175b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5154c6c550624bbc830919d8908295d4cf807e72ba189bc4d4e8d6e54aadbb27 +size 1787670 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..8836f2dcbcd35fa34e612d02cc5968962e59bff1 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a984d4355387c2750bf403e928bd088d9ade12f7e0034c6eebc319607addf647 +size 1674674 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..9b3cba943799914f027289070ad98fb2e6936244 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6887794b3da838fa5d3e95e7802012384af92a370d5b48c3e34f84a30d69d82e +size 1775096 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..1693abe8f58fb76c2d135f8d27b5ddf7366964ef --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e079e6d63190924a582b39d1ea2cac595e78ca8c9781f53d94edc42298f2b78 +size 1769389 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..833305983d4df6d264bbfaf0beb313fe205547d6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d82285b1e160c5576f8d6c4500de981dd07c8ef27dccaf06cb9269fc51952c7 +size 1743471 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..cf69c31ed590ff94e54dfb3f6b5fc9501cc95880 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b554af935c714fc96a917c734b13713e2a7f08279e26a55f7aaa76bb9cd099 +size 1764264 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..80d5ee6b5dbf9bf43cd142a8ee5a0300ed2bb7b8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6474acb9afa593c158e144d2e651084ca9e38a9309118dada19636ab475cec5f +size 1834888 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..5b861185c6436313e7566497ea188d6c7dcbf71e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb078db88cc4a3b401b39a2f1ac248548fc367104bde15caaa1f6b2a9f066c10 +size 1653828 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..50fe6b5bdea147e43bb13863951cbd0aa835c885 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d9dd7745f4c5838ef02ef00d0a8223d11869963b96d1ffc362001e4e964ebb +size 1795683 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b9cb59edf22c96cf953ca610931068791575588a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89878d19e1a472dbffd540f35f2b37d6e2f444c6891149c47fe23b89fa4b3ec1 +size 1661158 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..c5842ddb5b6d13cc012f2ffb127095bbfcfa0c5e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a165280cb2d2760d423da2e1fde2287166269dd5da8683ac142e379ca22bd1 +size 1730387 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..931cf32445b5135cae44133dd6e3cd1453bbd226 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53867f77edea82ec2680f141b4dafedc6329e204d8060850a10c6e7e4f939c2e +size 1523065 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..89e8f8b42d7d74d00667b30facbcb8157e029bfd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ad8d26c00c90bd0eb5f8a210f262932f07bad5a9087bd42036f0d6882ceda1 +size 1818047 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..bacacde9c5e4ba9ea45dbfade17da4a32689f38b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2874854ef8a7e00db4b156003ed65b90315840b883ebd2521c8defd5e199fa6c +size 1660356 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..883c8b0a25dcc5640cdf7433a3cad7f25323d2bd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75bb794b6e01ad23540ad1fb0cdcea2297d5eab06d4127df0627869ca8bdc27c +size 1763934 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..0a8a969cd3aa7ac95e10e18f4ebc5438290f5ee3 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bfe36b354fd8a60f7b9a4a40cd9efad4dfb9fed6bf814bb7525d852ee3e3365 +size 1869536 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..56f66bb5952bb162c0462091ca41e05ff35bb2d2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c77c019e0fd6ed742dadc2733c3a51e3983a42d9d6bbbead5e6a6e3096b04b +size 1629340 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a4ef7c6f04bdeaa4ba1c52dd6454ff2e24da49ac --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef1232b96dd082ac891b46f4ca98bfacaef0aca3eddc8f7a5b51c485a7cedd4 +size 1593568 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..3983408b403ba9a5cd0d8744682009ce347d898c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf857d3af19d34c90cc78d5544fc8c1bd80e89f6a304426fc90c595c255d244 +size 1831743 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..5e9039c27333ca77302eae008ccaf3a98e2872f0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628a10b84b9abe436286073125ba6784c51ec7da4d4efbb85e3d898226a57ded +size 1658382 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..1ed97165fbd089b5bf4340117d33da209c3ba9c5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f948f57fb4a5ab9afd3ceeacf418139fdc10eeebf66929e13950ff394ff8291e +size 1837937 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..c34ce5952542514d03936e0a2ed49b3295b33414 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55121b50a41777b4536995bbf5230f763133e37d9c5ca3143964b327a058d1f +size 1859268 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..6ff9b81f0dcb354c1cbc2323da02365cf35cc345 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811991e61e663f934bd726f9d4502df8e8c07129c480f5b3d3c46441b57875ab +size 1580460 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..80592915a8eecfd1bfb493a31b5877a5582b1e70 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8241059bcc6584e318f2b63bf7892faf2313eddd2730702f5ae0fa246a813ab5 +size 1657874 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..7ea76a450f449a1207c8286c0e8196d088e00470 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_0_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5197b0106e1a0d92eb63600aaebea361e4e178ead84c3cce84bf56e88da36d99 +size 1828351 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000.png new file mode 100644 index 0000000000000000000000000000000000000000..c120321ce8a5cfc35ada9bb0a4db7af68b16f45c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1432b06ef4d39a457437ca0dc2349eb80dace74b1f58f281a8e96bdcfbed050a +size 1936383 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..88a621661c4a07974381488aa5078495e5121c4a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893eb8fac0ed979767a54541272fe9c746bd4648f499ff38359e53053958699c +size 2709428 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..79e5d8ffe48a36c1f3c439f816d4cd18ecb3e49e --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc372eaa2f8707cf50be126754f0abddd3a28aa0a4dc15fb3174ef30702bc27e +size 2690374 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..fa48dea8a20acb188055fbb4cd83d83d7a8c7503 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bec2a758805fdfb41120216d50936b2cfdbc9aa77e94333c1f71d54aaf8988 +size 2719521 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..c8ea751b0a47ad8941becfa325ffc7b2344d8214 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfde6e2255af990c079b6bc87c6964ab51cc4d4f920c21bb7b45eb7272dc85d7 +size 2712301 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..1e0e99dbc72f849cada03df4964bfa0f923f81c7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_000000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdedbec7d99e4283f26d9a05e8f3a86bf8c464da6721853a1a4940bb69e0be27 +size 2739650 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000.png new file mode 100644 index 0000000000000000000000000000000000000000..ea7cb3978d9c594943f57bedb7bb62f7ec4303eb --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1989faa5256756cc28a48e04280f2a53663bd24ae86f654ae50b2949647fb85e +size 1702141 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..8670afeb96398a9ce42a572ae1efe54acb949756 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f9522ae43058541620b095a4db2d00d9493d123178ced34388ea91fad3af87 +size 1791393 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..b845d0513b13e9a326c3ffb52a69a58989626002 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a6436859a57df853bc0c7257c16d904b92645a8092418bd0fb63bec97861d9 +size 1983122 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..17672931303ae898de36e199719a96cd55758485 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d3517394742f89df8e507042bf27a5225cbd024c9ccf2890ed447cc8a9d0ac +size 1769672 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9d5394f9f86693359783213dd37f883909af984b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9116164e6b53ac999e837308e89e6b9ee66b28fd86e6cbc6f11a8624733bfdf +size 1627588 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..6b95dd6619609359d918029d720f6342117da881 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_002000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceea9a6c216367b417a9ca9db8bb3458e42527c2c2242066b49ae8051a7e5e14 +size 1687962 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000.png new file mode 100644 index 0000000000000000000000000000000000000000..b3c02270243b421ea0c6ff48f1114ca6b1c56552 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd9e0244e617ebe7d597fc8fe178fd4be07257ec333a4bc9ca8536627996eb8 +size 1727152 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..f3c9d095cd95c8300e8a716463f68e8a97830549 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ccaa9c800c3043834a74712d047e2f08250c6cce575712bdd18f4940c4b6d5b +size 1820565 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..20bd07b18c2c9bcb521f2abf3dc84a86961eb128 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8589ce558e1c49843b7f20e1c9395ab1d90fd23312c9b49882326f5c3a5810de +size 1947734 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..1b81a4838ff5fd8920bae9dc3eb46b62a273f5c6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b389c4c75a5e4c708966137df89b31908a64b2d02b54e508bb2d59ef6b721cd +size 1777772 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..35b6a1f69249bb88dabe107e1d6031e751a8254c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f340f161c80168345858705f8dba3634674b23ac9fe276fcdd7383f832ee3cc3 +size 1860662 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..27c0dfabf8fc1b8882bf2b144df430c1b95c1997 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_004000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d70e3aee59c105d7e4c0c726be391f18f57c3c22fb929f30314d668302ad27c5 +size 1730832 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000.png new file mode 100644 index 0000000000000000000000000000000000000000..ace7ca2687dcc33578294405c08f0176a692454f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83357aabe4437822130b804237ddc67be9b414e27f562d58e235524188f359a4 +size 1724551 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..e9c3148fe1c5b6ec3202c114e4a7d8b4a090b022 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a244bdfa2e8be8492673bfce3d7c673bcc25fcbe905701704c9e0e78e47932f8 +size 1894062 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..627b2365d9f6bb4506d6bf53d7bf3f7e775a5d3c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6741d2c6e077e8a176ec822565c76d8976ff7052d31d6e5ff38bea8e81f60160 +size 1864450 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..4022a82a746eab10a6b2f3ea17e75fb36ad7fbdd --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8b6d19649f0b1ab38bb844f56d526e281b5cda96bde50d4ee45321ef5adc1b +size 1793850 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..14792b543fad56e9642d5937fc0e549b66bbe1f7 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa6d45df857b54d6507364ad1495419115452786f373e13e7b1246abfd2ed0b +size 1874556 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..9b87b253d2bb41b315230b509ecb10b7a7506a32 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_006000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d959166c62d76b3eac7d246d3619dcb9436f6cfb28b4f226a2ed541a1f0a5c +size 1774419 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000.png new file mode 100644 index 0000000000000000000000000000000000000000..4f1dc7be506b828a62493fec7d857940aee7059f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd3a1dedf1b386620657daa3da35fd8e236392e31ce6f1b80aa7b932a400505 +size 1713426 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..2434303068b172b3f06c74fca625f998d9d58d49 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570b51c2e6ee8338e230971c906e0b0c871bd91f4ba14c18d12d46b8d9a8372f +size 1852853 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..19ae6a2e39bb7c21b684db603b84872ab90980e8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e988ab2e570a408d1b310eed9e4a6baa675cc7702ccb9bfbec19e70793760e19 +size 1888529 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..a8c053ad8ce280f2185a60c9584bb419ae0ba45f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9f2eccb3bddbfad2b0dcc799fbb1dbf29eae7095b71ebccc7b57d16f32df75 +size 1750525 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..6f4d0b51fa19b5d7d522b530808a20c3cf3ead02 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42189778d5b248ae51f4617c6de50179bda2bf16d8b9250b875d3e38d86b6dc +size 1690394 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..e4750b4956db487ef5b7a9fcf16b3dac0170f53f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_008000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8703ff920046dce24bf4effbc6738396c328eff1cb6f07f2f94e8604d9f2694f +size 1707023 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000.png new file mode 100644 index 0000000000000000000000000000000000000000..2e8061e15208265902faa5d9ca040069556651e6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a425fb1c3e99bb152aaaa70e8334c8a50746a568b462483ec92be7467b2126c +size 1696501 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..5d1b00887fc2fdf18dc5588739ef75fdd8aebb4f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0979022c2ecf46491b17d3dfef7e81e590a65ba3adbfb558ac71a990b5050442 +size 1777984 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..e0e17cc73cd77d09527f3387e2577b249d0af46b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a535267cd793e0fc699113bace2752af2665b4d6a74e1ec13ccb7600272d9cd8 +size 1963210 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..2bf06db05428fdafcd8e2058841f8884156a32de --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a9ace0bdaf90074e3b36c75c6191804e09380d085c22eb3aafa3a292992b71 +size 1679204 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..7806830b679010cd73c5e9207713008457d60e77 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0ed9eb8ceba56725b0ae5c994872db6d1e0d674d690c0968f7f07a1750a436 +size 1650873 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..ef8fa657a60915a409f582b2506dfe1776fe4b1d --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_010000_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e955fbb33b7f3d047ee28045531d447b607fbd84cf23f11af4674291a1607e2 +size 1692255 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999.png new file mode 100644 index 0000000000000000000000000000000000000000..5068243b9b310e903d4342952d84ced323b9dac8 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c923a0e9b0c1afac38893def29ee3623f33f5c02cfa0969cc480ca77df5f4f +size 1691820 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..ab209e7a78cf89a14c355105b22638d0817924f6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f401b76910f8fb85844fc643f26c2093fb963f2bcd45f5ab9991dfe80a6ec665 +size 1660865 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..ebafb2278d18694bee973e5b725e7a9d2eb28008 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28812ea48ba01669c268a6e7ddf12694b31dc6dea580e194a2e101e8ee4f520f +size 1809937 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..f35f3d61086333c1d5283a918a14155ee8b9810a --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8784f97b28917483b6fe3c26701980d0e1f380176b87555e2e925e40db04eb +size 1802753 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..561f1ca5bccc588074df076da4162cacc8872562 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e629837226580e058ee831e6e515245c5ac0250114d1761d72e6645a1a1c0d04 +size 1646436 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..d3667b7a52d1dc5ca92846ee159bc0e26c144293 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_011999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d472ac56df41104ec1b083a937705b9a1c5ec3524cdccb2162418e1a7fa64f15 +size 1771536 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999.png new file mode 100644 index 0000000000000000000000000000000000000000..149413714f47babf679be3b5367487994fec4020 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffa08af4b6d3f5e93ccbfc36bc89c3c58e3469742f9f718f919f615874c401d +size 1716387 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..a58cc3eab1c5b40695edf42e34e735382e893be5 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dab7ea7dcc420c9e528e4ee218fdf5af0f0afdb80984abb43f19e3c7bc2f83 +size 1863529 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..cf49e988d19504a9e74c4e9a6c9bd298a809af73 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8e1c5535023b3eac6b45090c430ff61b028df3da78c7d929cf773cfe5ad21c6 +size 1928163 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..4240361531faed9ca6d623e86fd4cf51883dae46 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fab85c18f1f3ba5c6a68c21713a148b70d99545b137212958c5f9bedf82d687 +size 1681064 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a91eac80fea4ba12fa4eedb39938ab46fe83fce6 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c72e1a9ad7554644b6bc33cdaac508e8d9e09934cbff3beeaa4232f1ebf6bb0 +size 1743356 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..7334524a6cf6613c1f13e8d481425191b89f586f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_013999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68011e9d43dbcde2676fd5a26f2d2a84fba44a134148a638b84bcf95cc4ba87 +size 1694445 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999.png new file mode 100644 index 0000000000000000000000000000000000000000..c32cda58848ef8450e81eb0afb9f73983eaa5c24 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357957846d74429fd0e6f1474aa93d3d274448fa8ee7d265a6860495740c5263 +size 1702790 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..c6f0721c128e09afc66cfea982b35b69427a2c3f --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0f076a3d64e0f93f32167d365b279371d71e53bef65354e4c4c1117a424cf3 +size 1737143 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..226ef45395a8a2ea1e45f775cc02417053725909 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2111ea93ea9157daaf8c27bd30020015f401f4d9eb55f59eef5f7bdc3922127 +size 1791702 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..b53c1f1fc6ed525a51c0772860ccfd05f6721401 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e1c7c9642a7d51453d2d1f6bb7683bee80a10ca9d358b683537a8e06806a14 +size 1743083 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..a75ad20f86514ba53aae63b9315249b734176c85 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f983963d126ef9e50f79cd932665ff21ead968b74de4292735544f5baf26c5c9 +size 1653981 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..c9aff6709f9d1420bbe4251f37b155e14150e03b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_015999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c501b0da240fa0b2ba8711856379538fdaf0cc648bc589754811ae93e61840c +size 1628488 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999.png new file mode 100644 index 0000000000000000000000000000000000000000..0415479bb2f1cb1159159f3b7ffae1ed7fbaf2ad --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35bff467ca3b4df019de1bc980e37f108e709596125179061170d8d50065e537 +size 1722254 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..6ed716a428db64449fc32388bda955704dce6164 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189c4c42fd2e9135903b594dd7d44a66cac972158624a4a70f5f06659c25d2fd +size 1777389 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..7aec5c0700c72b8be4fc49da6f3e9d7ea12869d0 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e038294fbba41011c7ec66df89bb56395c8ddede3481fd5b3f58f5946f8c79a +size 1915379 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..3c97060d6283f84d563c3c745b47f95b7c9ce6f2 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e966e69a9d9d3ab809138a9e23a42f1e7f623a30d50f17ecb568f36b6e6a764f +size 1812277 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..9520837763075cb0ec8edfddeedc074b0e621e53 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe78141e43ada2141b9a6d84bc4dfd095869f621f7c41df6070a0153bc05c85 +size 1703756 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..775026bcaf241a45d528bd5ec04ae66b4602027c --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_017999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ab9aed52dafdae79157a9fc6d8e42af43eb37c50dd19c37e76f87e43c440c8 +size 1879292 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999.png new file mode 100644 index 0000000000000000000000000000000000000000..1e4aceb4c708caab7b817279e37639cf8ce14f17 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a36cb5058659d347d7652fd325d1fc7a85d6970b40b3aaf6d28922c508487d +size 1712061 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample00.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample00.png new file mode 100644 index 0000000000000000000000000000000000000000..8a562a522511f21661d9f31d971fc60d971ce498 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample00.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d3ab07b3aed509670c1465a2e28a577dc3dfb4deebbf59d66e93e61ebaa081b +size 1777988 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample01.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample01.png new file mode 100644 index 0000000000000000000000000000000000000000..157698c83e106ea373476657849cae3c82b7fd0b --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample01.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477c3ec80ba7a9fc55add5425cdeb3b5871af97eb29bba29c3b20aff77c7ea01 +size 1773371 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample02.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample02.png new file mode 100644 index 0000000000000000000000000000000000000000..be18417ad52608c127599e0b3841fcdb0aa5d2e9 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample02.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661f1b4cbdbb15955e1e60df9bd33c2a2195a8c2051df8aa8a2b6a3c64e6294f +size 1751486 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample03.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample03.png new file mode 100644 index 0000000000000000000000000000000000000000..2f3a0e0b503f2612a66d64ee80fd44550e84baad --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample03.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e229940e452c305fb4f8fc2216115ddb51b6a22be97a5d95fca4087fa7a31fa +size 1729792 diff --git a/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample04.png b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample04.png new file mode 100644 index 0000000000000000000000000000000000000000..0fad1510da35bf70cfae17f7b86cde3a86248d28 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/step_vis/val_1_step_019999_sample04.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df6b9c85244ce68be8415ba8ce85840c9b20f0ef7531c6f8b3a4a0562773337 +size 1643804 diff --git a/output/lora_rank_128_mlp_H200/train.log b/output/lora_rank_128_mlp_H200/train.log new file mode 100644 index 0000000000000000000000000000000000000000..d00be52c4fd310c497025c65cc7ac9a37bf5fe99 --- /dev/null +++ b/output/lora_rank_128_mlp_H200/train.log @@ -0,0 +1,5983 @@ +Run dir : output/lora_rank_128_mlp_H200 +Log file: output/lora_rank_128_mlp_H200/train.log +GPU: NVIDIA H200 NVL | VRAM: 139.8 GiB | PyTorch: 2.11.0+cu130 +wandb: [wandb.login()] Loaded credentials for https://api.wandb.ai from WANDB_API_KEY. +wandb: Currently logged in as: hkujasonjiang (hku-xg-boost) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +wandb: Waiting for wandb.init()... + m wandb: Waiting for wandb.init()... + m wandb: setting up run gv4s3qq7 (0.3s) + m wandb: setting up run gv4s3qq7 (0.3s) + m wandb: Tracking run with wandb version 0.26.0 +wandb: Run data is saved locally in /home/xg_wang_group/SynthUrbanSAT/wandb/run-20260416_164244-gv4s3qq7 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run lora_rank_128_mlp_H200-train-20260416-164244 +wandb: View project at https://wandb.ai/hku-xg-boost/lora_rank_128_mlp_H200 +wandb: View run at https://wandb.ai/hku-xg-boost/lora_rank_128_mlp_H200/runs/gv4s3qq7 + +Final Configuration: + Paths: + transformer_path weights/flux2_dev_fp8mixed.safetensors + vae_path weights/flux2-vae.safetensors + controlnet_path weights/FLUX.2-dev-Fun-Controlnet-Union-2602.safetensors + dataset_dir dataset + color_map_path configs/color_map.json + output_dir output/lora_rank_128_mlp_H200 + text_encoder_path weights/mistral_3_small_flux2_fp8.safetensors + precomputed_embeddings output/text_embeddings_global.pt + Model: + image_size 1024 + num_classes 6 + control_in_dim 3072 + fusion_dim 768 + num_fusion_blocks 3 + num_heads 12 + num_fourier_bands 32 + boundary_threshold 0.1 + Training: + num_epochs 500 + batch_size 10 + adapter_lr 0.003 + lora_lr 0.0001 + weight_decay 0.01 + max_grad_norm 1.0 + grad_accum_steps 4 + guidance_scale 3.5 + num_workers 0 + Text Encoder: + text_seq_len 512 + text_dim 15360 + Logging: + log_interval 10 + save_every_n_epochs 5 + val_every_n_epochs 1 + WandB: + wandb_entity hku-xg-boost + wandb_project lora_rank_128_mlp_H200 + Resume: + resume_from (not set) +[MEM @ pre-flight] RAM: 10.9/188.4 GiB (5.8%) | VRAM: 0.0/139.8 GiB (0.0%) + +Pre-flight checks... + ✓ torch + ✓ diffusers + ✓ safetensors + ✓ Pillow + ✓ tifffile + ✓ wandb + ✓ transformers + ✓ psutil + ✓ GPU: NVIDIA H200 NVL (139.8 GiB VRAM) + ✓ transformer_path: weights/flux2_dev_fp8mixed.safetensors (35.5 GB) + ✓ vae_path: weights/flux2-vae.safetensors (0.3 GB) + ✓ controlnet_path: weights/FLUX.2-dev-Fun-Controlnet-Union-2602.safetensors (8.2 GB) + ✓ text_encoder_path: weights/mistral_3_small_flux2_fp8.safetensors (18.0 GB) + ✓ train/rgb: 400 files + ✓ train/seg: 400 files + ✓ train/depth: 400 files + ✓ val/rgb: 80 files + ✓ val/seg: 80 files + ✓ val/depth: 80 files + ✓ test/rgb: 30 files + ✓ test/seg: 30 files + ✓ test/depth: 30 files + ✓ prompt.json found + +All pre-flight checks passed. + +============================================================ + [1/8] Text Embeddings +============================================================ + Loading cached embedding from output/text_embeddings_global.pt + Loaded global text embedding from output/text_embeddings_global.pt (shape: torch.Size([512, 15360])) + +============================================================ + [2/8] Loading VAE +============================================================ + Done (1.1s), VRAM: 0.16 GiB +[MEM @ after VAE] RAM: 11.3/188.4 GiB (6.0%) | VRAM: 0.2/139.8 GiB (0.1%) + +============================================================ + [3/8] Loading Transformer +============================================================ + Dequantizing FP8 transformer weights... + Dequantized 128 FP8 tensors + Converting ComfyUI → diffusers keys... + Converted: 331 diffusers keys + Loading ControlNet weights... + ControlNet: 76 keys + Creating Flux2ControlTransformer2DModel (control_in_dim=3072)... + Skipped 2 control_img_in keys (dim mismatch): + control_img_in.bias [6144] + control_img_in.weight [6144, 260] + Missing: 2, Unexpected: 0 + Initialized control_img_in.weight [6144, 3072] on cuda + Initialized control_img_in.bias [6144] on cuda + FP8 compression: 203 frozen Linears, 67.9 → 37.9 GiB (saved 30.0 GiB) + Done (30.4s), VRAM: 37.87 GiB + Gradient checkpointing: enabled + Backbone FROZEN: all transformer params set requires_grad=False + Gradients will still propagate to HDC²A via control_context autograd +[MEM @ after Transformer] RAM: 12.2/188.4 GiB (6.5%) | VRAM: 37.9/139.8 GiB (27.1%) + +============================================================ + [4/8] Creating HDC²A Adapter +============================================================ + Fusion mode: MLP (lightweight) +HDC²A: 14.6M params +Control: 0.0M params +Total trainable: 14.6M params + +============================================================ + [4.5/8] Applying LoRA to ControlNet Control Blocks +============================================================ + LoRA rank=128, alpha=128.0, dropout=0 + LoRA control_transformer_blocks.0.attn.to_q [6144→6144] + LoRA control_transformer_blocks.0.attn.to_k [6144→6144] + LoRA control_transformer_blocks.0.attn.to_v [6144→6144] + LoRA control_transformer_blocks.0.attn.add_q_proj [6144→6144] + LoRA control_transformer_blocks.0.attn.add_k_proj [6144→6144] + LoRA control_transformer_blocks.0.attn.add_v_proj [6144→6144] + LoRA control_transformer_blocks.0.attn.to_out.0 [6144→6144] + LoRA control_transformer_blocks.1.attn.to_q [6144→6144] + LoRA control_transformer_blocks.1.attn.to_k [6144→6144] + LoRA control_transformer_blocks.1.attn.to_v [6144→6144] + LoRA control_transformer_blocks.1.attn.add_q_proj [6144→6144] + LoRA control_transformer_blocks.1.attn.add_k_proj [6144→6144] + LoRA control_transformer_blocks.1.attn.add_v_proj [6144→6144] + LoRA control_transformer_blocks.1.attn.to_out.0 [6144→6144] + LoRA control_transformer_blocks.2.attn.to_q [6144→6144] + LoRA control_transformer_blocks.2.attn.to_k [6144→6144] + LoRA control_transformer_blocks.2.attn.to_v [6144→6144] + LoRA control_transformer_blocks.2.attn.add_q_proj [6144→6144] + LoRA control_transformer_blocks.2.attn.add_k_proj [6144→6144] + LoRA control_transformer_blocks.2.attn.add_v_proj [6144→6144] + LoRA control_transformer_blocks.2.attn.to_out.0 [6144→6144] + LoRA control_transformer_blocks.3.attn.to_q [6144→6144] + LoRA control_transformer_blocks.3.attn.to_k [6144→6144] + LoRA control_transformer_blocks.3.attn.to_v [6144→6144] + LoRA control_transformer_blocks.3.attn.to_out.0 [6144→6144] + + LoRA modules injected: 25 + LoRA trainable params: 39.32M + +Parameter Statistics: + HDC²A Adapter: total=14.6M trainable=14.6M + ControlNet (frozen): total=4172.8M LoRA trainable=39.32M + Flux2 backbone: total=0.0M trainable=0.0M ✓ + ────────────────────────────────────────────────── + Total trainable: HDC²A 14.6M + LoRA 39.32M = 53.88M + +============================================================ + [5/8] Building Optimizer +============================================================ + AdamW: adapter_lr=3.00e-03, lora_lr=1.00e-04 + param_group 'adapter': 46 tensors, lr=3.00e-03 + param_group 'lora': 50 tensors, lr=1.00e-04 + Scheduler: 400 warmup steps → cosine over ~5000 steps + [6/8] Resume: skipped (no checkpoint specified) + +============================================================ + [7/8] Forward Sanity Check +============================================================ + [test 1/4] Forward pass (eval mode)... + Output shape: torch.Size([1, 4096, 128]) + Output stats: mean=0.0347, std=0.5078 + VRAM peak (forward): 68.44 GiB + [test 2/4] Loss computation (train mode)... + Loss value: 1.351268 + [test 3/4] Backward pass... + Backward completed. VRAM peak (backward): 46.84 GiB + [test 4/4] Gradient flow check... + HDC²A: 46/46 params have non-zero grad + Control: 25/50 params have non-zero grad + Top grad norms (HDC²A): + semantic_encoder.conv_stem.6.weight: 0.034912 + depth_encoder.conv_stem.6.weight: 0.031250 + semantic_encoder.conv_stem.3.weight: 0.022339 + W_s.weight: 0.021729 + W_d.weight: 0.020142 + Test result: PASSED +[MEM @ after test] RAM: 12.8/188.4 GiB (6.8%) | VRAM: 38.0/139.8 GiB (27.2%) + +============================================================ + [8/8] Loading Data +============================================================ +[Data] Data augmentation: disabled +[Data] Train: 400 samples, batch_size=10 +[Data] Train: using global text embeddings +[Data] Val: 80 samples, batch_size=10 +[Data] Test: 30 samples, batch_size=10 + +====================================================================== +Starting training: 500 epochs × 40 steps = 20000 total steps + batch_size=10, grad_accum=4, world_size=1, effective_bs=40 + adapter_lr=3.00e-03, lora_lr=1.00e-04, weight_decay=0.01 +====================================================================== + [Milestone Vis] steps: [0, 2000, 4000, 6000, 8000, 10000, 11999, 13999, 15999, 17999, 19999] + [Milestone Vis] 10 grids: train_0(5), train_1(5), val_0(5), val_1(5), test_0(5), test_1(5), test_2(5), test_3(5), test_4(5), test_5(5) + +--- Epoch 0/499 (0% done) --- + [MilestoneVis] train_0 step 0 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 0 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 0 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 0 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 0 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 0 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 0 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 0 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 0 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 0 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 0][10/40] loss=0.890734 avg=0.908653 VRAM=38.9GiB | 0.1% done | ETA(epoch): 2908s + [Epoch 0][20/40] loss=0.854883 avg=0.917621 VRAM=38.8GiB | 0.1% done | ETA(epoch): 1200s + [Epoch 0][30/40] loss=0.989746 avg=0.921089 VRAM=38.9GiB | 0.1% done | ETA(epoch): 477s + [Epoch 0][40/40] loss=0.939454 avg=0.920029 VRAM=38.8GiB | 0.2% done | ETA(epoch): 0s + Train loss: 0.920029 (1659.7s) ETA: 13823min + Val loss: 0.975866 [t_0.0-0.2=1.0875 t_0.2-0.4=1.0566 t_0.4-0.6=0.9131 t_0.6-0.8=0.7573 t_0.8-1.0=1.0595] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0000 (BEST) +[MEM @ epoch 0 end] RAM: 16.4/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 1/499 (0% done) --- + [Epoch 1][10/40] loss=0.928154 avg=0.930068 VRAM=38.9GiB | 0.2% done | ETA(epoch): 689s + [Epoch 1][20/40] loss=0.993727 avg=0.935953 VRAM=38.8GiB | 0.3% done | ETA(epoch): 460s + [Epoch 1][30/40] loss=0.829289 avg=0.924732 VRAM=38.9GiB | 0.4% done | ETA(epoch): 230s + [Epoch 1][40/40] loss=0.956584 avg=0.920791 VRAM=38.8GiB | 0.4% done | ETA(epoch): 0s + Train loss: 0.920791 (920.1s) ETA: 10987min + Val loss: 0.923170 [t_0.0-0.2=1.0750 t_0.2-0.4=1.0123 t_0.4-0.6=0.9004 t_0.6-0.8=0.7625 t_0.8-1.0=0.8626] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0001 (BEST) +[MEM @ epoch 1 end] RAM: 16.5/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 2/499 (0% done) --- + [Epoch 2][10/40] loss=0.890961 avg=0.920893 VRAM=38.9GiB | 0.4% done | ETA(epoch): 691s + [Epoch 2][20/40] loss=0.890272 avg=0.907629 VRAM=38.8GiB | 0.5% done | ETA(epoch): 460s + [Epoch 2][30/40] loss=0.959824 avg=0.911814 VRAM=38.9GiB | 0.5% done | ETA(epoch): 230s + [Epoch 2][40/40] loss=0.920409 avg=0.914042 VRAM=38.8GiB | 0.6% done | ETA(epoch): 0s + Train loss: 0.914042 (920.6s) ETA: 10032min + Val loss: 0.951461 [t_0.0-0.2=1.0809 t_0.2-0.4=1.0121 t_0.4-0.6=0.8885 t_0.6-0.8=0.7802 t_0.8-1.0=0.9305] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0002 +[MEM @ epoch 2 end] RAM: 16.5/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 3/499 (1% done) --- + [Epoch 3][10/40] loss=0.884845 avg=0.909862 VRAM=38.9GiB | 0.7% done | ETA(epoch): 691s + [Epoch 3][20/40] loss=0.837420 avg=0.909675 VRAM=38.8GiB | 0.7% done | ETA(epoch): 460s + [Epoch 3][30/40] loss=0.876854 avg=0.911430 VRAM=38.9GiB | 0.8% done | ETA(epoch): 230s + [Epoch 3][40/40] loss=0.960133 avg=0.912105 VRAM=38.8GiB | 0.8% done | ETA(epoch): 0s + Train loss: 0.912105 (920.6s) ETA: 9546min + Val loss: 0.918525 [t_0.0-0.2=1.0645 t_0.2-0.4=1.0225 t_0.4-0.6=0.8829 t_0.6-0.8=0.7538 t_0.8-1.0=0.9150] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0003 (BEST) + Deleted old checkpoint: checkpoint_epoch_0000 +[MEM @ epoch 3 end] RAM: 16.4/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 4/499 (1% done) --- + [Epoch 4][10/40] loss=0.957803 avg=0.905398 VRAM=38.9GiB | 0.9% done | ETA(epoch): 691s + [Epoch 4][20/40] loss=0.882200 avg=0.903666 VRAM=38.8GiB | 0.9% done | ETA(epoch): 460s + [Epoch 4][30/40] loss=0.801701 avg=0.899288 VRAM=38.9GiB | 0.9% done | ETA(epoch): 230s + [Epoch 4][40/40] loss=0.882583 avg=0.898635 VRAM=38.8GiB | 1.0% done | ETA(epoch): 0s + Train loss: 0.898635 (920.7s) ETA: 9249min + Val loss: 0.915758 [t_0.0-0.2=1.0732 t_0.2-0.4=1.0408 t_0.4-0.6=0.8874 t_0.6-0.8=0.7299 t_0.8-1.0=0.8901] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0004 (BEST) + Deleted old checkpoint: checkpoint_epoch_0001 +[MEM @ epoch 4 end] RAM: 16.3/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 5/499 (1% done) --- + [Epoch 5][10/40] loss=0.943018 avg=0.896513 VRAM=38.9GiB | 1.1% done | ETA(epoch): 690s + [Epoch 5][20/40] loss=1.015816 avg=0.891295 VRAM=38.8GiB | 1.1% done | ETA(epoch): 460s + [Epoch 5][30/40] loss=0.940722 avg=0.892662 VRAM=38.9GiB | 1.1% done | ETA(epoch): 230s + [Epoch 5][40/40] loss=0.881230 avg=0.898074 VRAM=38.8GiB | 1.2% done | ETA(epoch): 0s + Train loss: 0.898074 (920.9s) ETA: 9045min + Val loss: 0.956212 [t_0.0-0.2=1.0694 t_0.2-0.4=1.0172 t_0.4-0.6=0.8754 t_0.6-0.8=0.7637 t_0.8-1.0=0.9351] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0005 + Deleted old checkpoint: checkpoint_epoch_0002 +[MEM @ epoch 5 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 6/499 (1% done) --- + [Epoch 6][10/40] loss=0.933636 avg=0.873449 VRAM=38.9GiB | 1.2% done | ETA(epoch): 690s + [Epoch 6][20/40] loss=0.943866 avg=0.884837 VRAM=38.8GiB | 1.3% done | ETA(epoch): 461s + [Epoch 6][30/40] loss=0.850770 avg=0.892642 VRAM=38.9GiB | 1.4% done | ETA(epoch): 230s + [Epoch 6][40/40] loss=0.892912 avg=0.896106 VRAM=38.8GiB | 1.4% done | ETA(epoch): 0s + Train loss: 0.896106 (921.4s) ETA: 8896min + Val loss: 0.904818 [t_0.0-0.2=1.0982 t_0.2-0.4=0.9980 t_0.4-0.6=0.8674 t_0.6-0.8=0.7775 t_0.8-1.0=0.8061] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0006 (BEST) + Deleted old checkpoint: checkpoint_epoch_0003 +[MEM @ epoch 6 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 7/499 (1% done) --- + [Epoch 7][10/40] loss=0.988393 avg=0.899541 VRAM=38.9GiB | 1.5% done | ETA(epoch): 691s + [Epoch 7][20/40] loss=0.830736 avg=0.895502 VRAM=38.8GiB | 1.5% done | ETA(epoch): 461s + [Epoch 7][30/40] loss=0.942345 avg=0.891167 VRAM=38.9GiB | 1.6% done | ETA(epoch): 230s + [Epoch 7][40/40] loss=0.857977 avg=0.895105 VRAM=38.8GiB | 1.6% done | ETA(epoch): 0s + Train loss: 0.895105 (921.8s) ETA: 8780min + Val loss: 0.958763 [t_0.0-0.2=1.0815 t_0.2-0.4=1.0039 t_0.4-0.6=0.9010 t_0.6-0.8=0.7713 t_0.8-1.0=0.8723] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0007 + Deleted old checkpoint: checkpoint_epoch_0004 +[MEM @ epoch 7 end] RAM: 16.3/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 8/499 (2% done) --- + [Epoch 8][10/40] loss=0.839380 avg=0.878663 VRAM=38.9GiB | 1.7% done | ETA(epoch): 692s + [Epoch 8][20/40] loss=0.881566 avg=0.887011 VRAM=38.8GiB | 1.7% done | ETA(epoch): 461s + [Epoch 8][30/40] loss=0.909823 avg=0.892887 VRAM=38.9GiB | 1.8% done | ETA(epoch): 230s + [Epoch 8][40/40] loss=0.824852 avg=0.886953 VRAM=38.8GiB | 1.8% done | ETA(epoch): 0s + Train loss: 0.886953 (921.7s) ETA: 8686min + Val loss: 0.890784 [t_0.0-0.2=1.0727 t_0.2-0.4=0.9982 t_0.4-0.6=0.8440 t_0.6-0.8=0.7520 t_0.8-1.0=0.8258] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0008 (BEST) + Deleted old checkpoint: checkpoint_epoch_0005 +[MEM @ epoch 8 end] RAM: 16.3/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 9/499 (2% done) --- + [Epoch 9][10/40] loss=0.848969 avg=0.872170 VRAM=38.9GiB | 1.8% done | ETA(epoch): 691s + [Epoch 9][20/40] loss=0.896346 avg=0.884021 VRAM=38.8GiB | 1.9% done | ETA(epoch): 461s + [Epoch 9][30/40] loss=0.878389 avg=0.888230 VRAM=38.9GiB | 1.9% done | ETA(epoch): 230s + [Epoch 9][40/40] loss=0.822791 avg=0.890860 VRAM=38.8GiB | 2.0% done | ETA(epoch): 0s + Train loss: 0.890860 (921.8s) ETA: 8608min + Val loss: 0.897037 [t_0.0-0.2=1.0857 t_0.2-0.4=1.0236 t_0.4-0.6=0.8556 t_0.6-0.8=0.7264 t_0.8-1.0=0.8368] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0009 + Deleted old checkpoint: checkpoint_epoch_0006 +[MEM @ epoch 9 end] RAM: 16.3/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 10/499 (2% done) --- + [Epoch 10][10/40] loss=0.853941 avg=0.885119 VRAM=38.9GiB | 2.1% done | ETA(epoch): 691s + [Epoch 10][20/40] loss=0.893167 avg=0.884671 VRAM=38.8GiB | 2.1% done | ETA(epoch): 461s + [Epoch 10][30/40] loss=0.971367 avg=0.887568 VRAM=38.9GiB | 2.1% done | ETA(epoch): 230s + [Epoch 10][40/40] loss=0.952549 avg=0.885861 VRAM=38.8GiB | 2.2% done | ETA(epoch): 0s + Train loss: 0.885861 (921.4s) ETA: 8541min + Val loss: 0.908613 [t_0.0-0.2=1.0833 t_0.2-0.4=1.0298 t_0.4-0.6=0.8377 t_0.6-0.8=0.7478 t_0.8-1.0=0.8436] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0010 + Deleted old checkpoint: checkpoint_epoch_0007 +[MEM @ epoch 10 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 11/499 (2% done) --- + [Epoch 11][10/40] loss=0.815284 avg=0.858154 VRAM=38.9GiB | 2.2% done | ETA(epoch): 691s + [Epoch 11][20/40] loss=0.882084 avg=0.868762 VRAM=38.8GiB | 2.3% done | ETA(epoch): 461s + [Epoch 11][30/40] loss=0.903010 avg=0.869876 VRAM=38.9GiB | 2.4% done | ETA(epoch): 230s + [Epoch 11][40/40] loss=0.900457 avg=0.879460 VRAM=38.8GiB | 2.4% done | ETA(epoch): 0s + Train loss: 0.879460 (921.1s) ETA: 8482min + Val loss: 0.915210 [t_0.0-0.2=1.0856 t_0.2-0.4=1.0023 t_0.4-0.6=0.8747 t_0.6-0.8=0.7305 t_0.8-1.0=0.7767] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0011 +[MEM @ epoch 11 end] RAM: 16.4/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 12/499 (2% done) --- + [Epoch 12][10/40] loss=0.831823 avg=0.865264 VRAM=38.9GiB | 2.5% done | ETA(epoch): 691s + [Epoch 12][20/40] loss=0.880429 avg=0.886747 VRAM=38.8GiB | 2.5% done | ETA(epoch): 461s + [Epoch 12][30/40] loss=0.786472 avg=0.876976 VRAM=38.9GiB | 2.5% done | ETA(epoch): 230s + [Epoch 12][40/40] loss=0.812758 avg=0.876337 VRAM=38.8GiB | 2.6% done | ETA(epoch): 0s + Train loss: 0.876337 (921.5s) ETA: 8429min + Val loss: 0.916877 [t_0.0-0.2=1.0686 t_0.2-0.4=1.0180 t_0.4-0.6=0.8653 t_0.6-0.8=0.7547 t_0.8-1.0=0.7448] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0012 + Deleted old checkpoint: checkpoint_epoch_0009 +[MEM @ epoch 12 end] RAM: 16.4/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 13/499 (3% done) --- + [Epoch 13][10/40] loss=0.853289 avg=0.876101 VRAM=38.9GiB | 2.6% done | ETA(epoch): 691s + [Epoch 13][20/40] loss=0.915392 avg=0.898999 VRAM=38.8GiB | 2.7% done | ETA(epoch): 461s + [Epoch 13][30/40] loss=0.812220 avg=0.900204 VRAM=38.9GiB | 2.8% done | ETA(epoch): 230s + [Epoch 13][40/40] loss=0.886651 avg=0.890409 VRAM=38.8GiB | 2.8% done | ETA(epoch): 0s + Train loss: 0.890409 (921.5s) ETA: 8383min + Val loss: 0.869036 [t_0.0-0.2=1.0774 t_0.2-0.4=0.9873 t_0.4-0.6=0.8694 t_0.6-0.8=0.7334 t_0.8-1.0=0.7591] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0013 (BEST) + Deleted old checkpoint: checkpoint_epoch_0008 + Deleted old checkpoint: checkpoint_epoch_0010 +[MEM @ epoch 13 end] RAM: 16.6/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 14/499 (3% done) --- + [Epoch 14][10/40] loss=0.871037 avg=0.906732 VRAM=38.9GiB | 2.9% done | ETA(epoch): 691s + [Epoch 14][20/40] loss=0.935609 avg=0.896650 VRAM=38.8GiB | 2.9% done | ETA(epoch): 461s + [Epoch 14][30/40] loss=0.892537 avg=0.899380 VRAM=38.9GiB | 2.9% done | ETA(epoch): 230s + [Epoch 14][40/40] loss=0.831776 avg=0.893725 VRAM=38.8GiB | 3.0% done | ETA(epoch): 0s + Train loss: 0.893725 (921.7s) ETA: 8340min + Val loss: 0.876698 [t_0.0-0.2=1.0852 t_0.2-0.4=0.9762 t_0.4-0.6=0.8591 t_0.6-0.8=0.7527 t_0.8-1.0=0.7697] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0014 + Deleted old checkpoint: checkpoint_epoch_0011 +[MEM @ epoch 14 end] RAM: 16.6/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 15/499 (3% done) --- + [Epoch 15][10/40] loss=0.814928 avg=0.861133 VRAM=38.9GiB | 3.0% done | ETA(epoch): 691s + [Epoch 15][20/40] loss=0.903534 avg=0.879000 VRAM=38.8GiB | 3.1% done | ETA(epoch): 461s + [Epoch 15][30/40] loss=0.924490 avg=0.879613 VRAM=38.9GiB | 3.1% done | ETA(epoch): 230s + [Epoch 15][40/40] loss=0.908748 avg=0.882684 VRAM=38.8GiB | 3.2% done | ETA(epoch): 0s + Train loss: 0.882684 (921.9s) ETA: 8300min + Val loss: 0.910719 [t_0.0-0.2=1.0741 t_0.2-0.4=1.0221 t_0.4-0.6=0.8558 t_0.6-0.8=0.7410 t_0.8-1.0=0.7644] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0015 + Deleted old checkpoint: checkpoint_epoch_0012 +[MEM @ epoch 15 end] RAM: 16.6/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 16/499 (3% done) --- + [Epoch 16][10/40] loss=0.791202 avg=0.881105 VRAM=38.9GiB | 3.2% done | ETA(epoch): 691s + [Epoch 16][20/40] loss=0.857130 avg=0.873586 VRAM=38.8GiB | 3.3% done | ETA(epoch): 461s + [Epoch 16][30/40] loss=0.906291 avg=0.881136 VRAM=38.9GiB | 3.4% done | ETA(epoch): 230s + [Epoch 16][40/40] loss=0.869761 avg=0.882720 VRAM=38.8GiB | 3.4% done | ETA(epoch): 0s + Train loss: 0.882720 (921.8s) ETA: 8263min + Val loss: 0.884087 [t_0.0-0.2=1.0671 t_0.2-0.4=0.9509 t_0.4-0.6=0.8929 t_0.6-0.8=0.7105 t_0.8-1.0=0.8009] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0016 +[MEM @ epoch 16 end] RAM: 16.7/188.4 GiB (8.9%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 17/499 (3% done) --- + [Epoch 17][10/40] loss=0.821671 avg=0.865135 VRAM=38.9GiB | 3.5% done | ETA(epoch): 691s + [Epoch 17][20/40] loss=0.826160 avg=0.867107 VRAM=38.8GiB | 3.5% done | ETA(epoch): 461s + [Epoch 17][30/40] loss=0.800604 avg=0.863109 VRAM=38.9GiB | 3.5% done | ETA(epoch): 230s + [Epoch 17][40/40] loss=0.904247 avg=0.869932 VRAM=38.8GiB | 3.6% done | ETA(epoch): 0s + Train loss: 0.869932 (922.1s) ETA: 8229min + Val loss: 0.878501 [t_0.0-0.2=1.0804 t_0.2-0.4=1.0162 t_0.4-0.6=0.8295 t_0.6-0.8=0.7489 t_0.8-1.0=0.7304] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0017 + Deleted old checkpoint: checkpoint_epoch_0014 +[MEM @ epoch 17 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 18/499 (4% done) --- + [Epoch 18][10/40] loss=0.811637 avg=0.861408 VRAM=38.9GiB | 3.6% done | ETA(epoch): 692s + [Epoch 18][20/40] loss=0.858868 avg=0.880910 VRAM=38.8GiB | 3.7% done | ETA(epoch): 461s + [Epoch 18][30/40] loss=0.883815 avg=0.882179 VRAM=38.9GiB | 3.8% done | ETA(epoch): 231s + [Epoch 18][40/40] loss=0.777313 avg=0.879050 VRAM=38.8GiB | 3.8% done | ETA(epoch): 0s + Train loss: 0.879050 (922.3s) ETA: 8197min + Val loss: 0.894722 [t_0.0-0.2=1.0731 t_0.2-0.4=1.0028 t_0.4-0.6=0.8732 t_0.6-0.8=0.7633 t_0.8-1.0=0.7331] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0018 + Deleted old checkpoint: checkpoint_epoch_0015 +[MEM @ epoch 18 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 19/499 (4% done) --- + [Epoch 19][10/40] loss=0.926625 avg=0.879443 VRAM=38.9GiB | 3.9% done | ETA(epoch): 691s + [Epoch 19][20/40] loss=0.882620 avg=0.881862 VRAM=38.8GiB | 3.9% done | ETA(epoch): 461s + [Epoch 19][30/40] loss=0.832031 avg=0.879532 VRAM=38.9GiB | 4.0% done | ETA(epoch): 230s + [Epoch 19][40/40] loss=0.916588 avg=0.882775 VRAM=38.8GiB | 4.0% done | ETA(epoch): 0s + Train loss: 0.882775 (921.9s) ETA: 8166min + Val loss: 0.871243 [t_0.0-0.2=1.0802 t_0.2-0.4=1.0212 t_0.4-0.6=0.8757 t_0.6-0.8=0.7343 t_0.8-1.0=0.7493] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0019 + Deleted old checkpoint: checkpoint_epoch_0016 +[MEM @ epoch 19 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 20/499 (4% done) --- + [Epoch 20][10/40] loss=0.824668 avg=0.905136 VRAM=38.9GiB | 4.0% done | ETA(epoch): 692s + [Epoch 20][20/40] loss=0.840688 avg=0.880280 VRAM=38.8GiB | 4.1% done | ETA(epoch): 461s + [Epoch 20][30/40] loss=0.951517 avg=0.870563 VRAM=38.9GiB | 4.2% done | ETA(epoch): 231s + [Epoch 20][40/40] loss=0.819339 avg=0.872160 VRAM=38.8GiB | 4.2% done | ETA(epoch): 0s + Train loss: 0.872160 (922.3s) ETA: 8136min + Val loss: 0.884227 [t_0.0-0.2=1.0705 t_0.2-0.4=0.9835 t_0.4-0.6=0.8640 t_0.6-0.8=0.7686 t_0.8-1.0=0.7604] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0020 + Deleted old checkpoint: checkpoint_epoch_0017 +[MEM @ epoch 20 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 21/499 (4% done) --- + [Epoch 21][10/40] loss=0.938969 avg=0.884757 VRAM=38.9GiB | 4.2% done | ETA(epoch): 691s + [Epoch 21][20/40] loss=0.931458 avg=0.892281 VRAM=38.8GiB | 4.3% done | ETA(epoch): 461s + [Epoch 21][30/40] loss=0.851724 avg=0.885219 VRAM=38.9GiB | 4.3% done | ETA(epoch): 230s + [Epoch 21][40/40] loss=0.858869 avg=0.885363 VRAM=38.8GiB | 4.4% done | ETA(epoch): 0s + Train loss: 0.885363 (921.8s) ETA: 8108min + Val loss: 0.867559 [t_0.0-0.2=1.0729 t_0.2-0.4=0.9889 t_0.4-0.6=0.8798 t_0.6-0.8=0.7496 t_0.8-1.0=0.7344] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0021 (BEST) + Deleted old checkpoint: checkpoint_epoch_0013 + Deleted old checkpoint: checkpoint_epoch_0018 +[MEM @ epoch 21 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 22/499 (4% done) --- + [Epoch 22][10/40] loss=0.818429 avg=0.871452 VRAM=38.9GiB | 4.5% done | ETA(epoch): 692s + [Epoch 22][20/40] loss=0.801426 avg=0.856472 VRAM=38.8GiB | 4.5% done | ETA(epoch): 461s + [Epoch 22][30/40] loss=0.968072 avg=0.862642 VRAM=38.9GiB | 4.5% done | ETA(epoch): 231s + [Epoch 22][40/40] loss=0.826608 avg=0.861994 VRAM=38.8GiB | 4.6% done | ETA(epoch): 0s + Train loss: 0.861994 (922.1s) ETA: 8081min + Val loss: 0.891611 [t_0.0-0.2=1.0690 t_0.2-0.4=0.9978 t_0.4-0.6=0.8953 t_0.6-0.8=0.7381 t_0.8-1.0=0.7606] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0022 + Deleted old checkpoint: checkpoint_epoch_0019 +[MEM @ epoch 22 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 23/499 (5% done) --- + [Epoch 23][10/40] loss=0.885390 avg=0.852227 VRAM=38.9GiB | 4.7% done | ETA(epoch): 692s + [Epoch 23][20/40] loss=0.838982 avg=0.860241 VRAM=38.8GiB | 4.7% done | ETA(epoch): 461s + [Epoch 23][30/40] loss=0.892545 avg=0.861660 VRAM=38.9GiB | 4.8% done | ETA(epoch): 231s + [Epoch 23][40/40] loss=0.954344 avg=0.867293 VRAM=38.8GiB | 4.8% done | ETA(epoch): 0s + Train loss: 0.867293 (922.4s) ETA: 8054min + Val loss: 0.876063 [t_0.0-0.2=1.0664 t_0.2-0.4=1.0285 t_0.4-0.6=0.8341 t_0.6-0.8=0.7522 t_0.8-1.0=0.7392] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0023 + Deleted old checkpoint: checkpoint_epoch_0020 +[MEM @ epoch 23 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 24/499 (5% done) --- + [Epoch 24][10/40] loss=0.800244 avg=0.867598 VRAM=38.9GiB | 4.9% done | ETA(epoch): 692s + [Epoch 24][20/40] loss=0.954037 avg=0.881620 VRAM=38.8GiB | 4.9% done | ETA(epoch): 461s + [Epoch 24][30/40] loss=0.874359 avg=0.869602 VRAM=38.9GiB | 5.0% done | ETA(epoch): 231s + [Epoch 24][40/40] loss=0.868691 avg=0.865512 VRAM=38.8GiB | 5.0% done | ETA(epoch): 0s + Train loss: 0.865512 (922.0s) ETA: 8029min + Val loss: 0.868717 [t_0.0-0.2=1.0816 t_0.2-0.4=1.0033 t_0.4-0.6=0.8313 t_0.6-0.8=0.7140 t_0.8-1.0=0.7522] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0024 +[MEM @ epoch 24 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 25/499 (5% done) --- + [Epoch 25][10/40] loss=0.839608 avg=0.896868 VRAM=38.9GiB | 5.1% done | ETA(epoch): 692s + [Epoch 25][20/40] loss=0.810559 avg=0.882652 VRAM=38.8GiB | 5.1% done | ETA(epoch): 461s + [Epoch 25][30/40] loss=0.892982 avg=0.876385 VRAM=38.9GiB | 5.1% done | ETA(epoch): 231s + [Epoch 25][40/40] loss=0.907686 avg=0.876796 VRAM=38.8GiB | 5.2% done | ETA(epoch): 0s + Train loss: 0.876796 (922.2s) ETA: 8004min + Val loss: 0.880949 [t_0.0-0.2=1.0801 t_0.2-0.4=1.0027 t_0.4-0.6=0.8659 t_0.6-0.8=0.7031 t_0.8-1.0=0.7251] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0025 + Deleted old checkpoint: checkpoint_epoch_0022 +[MEM @ epoch 25 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 26/499 (5% done) --- + [Epoch 26][10/40] loss=0.822006 avg=0.885158 VRAM=38.9GiB | 5.2% done | ETA(epoch): 691s + [Epoch 26][20/40] loss=0.975421 avg=0.885860 VRAM=38.8GiB | 5.3% done | ETA(epoch): 461s + [Epoch 26][30/40] loss=0.831010 avg=0.883746 VRAM=38.9GiB | 5.3% done | ETA(epoch): 231s + [Epoch 26][40/40] loss=0.862121 avg=0.878105 VRAM=38.8GiB | 5.4% done | ETA(epoch): 0s + Train loss: 0.878105 (922.0s) ETA: 7979min + Val loss: 0.870167 [t_0.0-0.2=1.0716 t_0.2-0.4=1.0605 t_0.4-0.6=0.8253 t_0.6-0.8=0.7391 t_0.8-1.0=0.7514] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0026 + Deleted old checkpoint: checkpoint_epoch_0023 +[MEM @ epoch 26 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 27/499 (5% done) --- + [Epoch 27][10/40] loss=0.883691 avg=0.880423 VRAM=38.9GiB | 5.5% done | ETA(epoch): 691s + [Epoch 27][20/40] loss=0.860447 avg=0.889180 VRAM=38.8GiB | 5.5% done | ETA(epoch): 461s + [Epoch 27][30/40] loss=0.871281 avg=0.878154 VRAM=38.9GiB | 5.5% done | ETA(epoch): 230s + [Epoch 27][40/40] loss=0.923760 avg=0.875053 VRAM=38.8GiB | 5.6% done | ETA(epoch): 0s + Train loss: 0.875053 (922.2s) ETA: 7956min + Val loss: 0.878145 [t_0.0-0.2=1.0683 t_0.2-0.4=1.0079 t_0.4-0.6=0.8482 t_0.6-0.8=0.7480 t_0.8-1.0=0.7160] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0027 + Deleted old checkpoint: checkpoint_epoch_0024 +[MEM @ epoch 27 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 28/499 (6% done) --- + [Epoch 28][10/40] loss=0.889225 avg=0.861086 VRAM=38.9GiB | 5.7% done | ETA(epoch): 691s + [Epoch 28][20/40] loss=0.802044 avg=0.859699 VRAM=38.8GiB | 5.7% done | ETA(epoch): 461s + [Epoch 28][30/40] loss=0.881513 avg=0.858773 VRAM=38.9GiB | 5.8% done | ETA(epoch): 231s + [Epoch 28][40/40] loss=0.927200 avg=0.866155 VRAM=38.8GiB | 5.8% done | ETA(epoch): 0s + Train loss: 0.866155 (922.2s) ETA: 7932min + Val loss: 0.905952 [t_0.0-0.2=1.0828 t_0.2-0.4=0.9919 t_0.4-0.6=0.8501 t_0.6-0.8=0.7425 t_0.8-1.0=0.7038] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0028 + Deleted old checkpoint: checkpoint_epoch_0025 +[MEM @ epoch 28 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 29/499 (6% done) --- + [Epoch 29][10/40] loss=0.856090 avg=0.863783 VRAM=38.9GiB | 5.9% done | ETA(epoch): 691s + [Epoch 29][20/40] loss=0.948802 avg=0.879966 VRAM=38.8GiB | 5.9% done | ETA(epoch): 461s + [Epoch 29][30/40] loss=0.901898 avg=0.882367 VRAM=38.9GiB | 5.9% done | ETA(epoch): 230s + [Epoch 29][40/40] loss=0.831447 avg=0.885342 VRAM=38.8GiB | 6.0% done | ETA(epoch): 0s + Train loss: 0.885342 (921.9s) ETA: 7910min + Val loss: 0.888921 [t_0.0-0.2=1.0850 t_0.2-0.4=1.0141 t_0.4-0.6=0.8494 t_0.6-0.8=0.7403 t_0.8-1.0=0.7055] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0029 + Deleted old checkpoint: checkpoint_epoch_0026 +[MEM @ epoch 29 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 30/499 (6% done) --- + [Epoch 30][10/40] loss=0.931132 avg=0.866565 VRAM=38.9GiB | 6.0% done | ETA(epoch): 691s + [Epoch 30][20/40] loss=0.864730 avg=0.861789 VRAM=38.8GiB | 6.1% done | ETA(epoch): 461s + [Epoch 30][30/40] loss=0.914534 avg=0.865101 VRAM=38.9GiB | 6.2% done | ETA(epoch): 230s + [Epoch 30][40/40] loss=0.865582 avg=0.862631 VRAM=38.8GiB | 6.2% done | ETA(epoch): 0s + Train loss: 0.862631 (921.8s) ETA: 7887min + Val loss: 0.873849 [t_0.0-0.2=1.0678 t_0.2-0.4=0.9838 t_0.4-0.6=0.8865 t_0.6-0.8=0.7183 t_0.8-1.0=0.7113] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0030 + Deleted old checkpoint: checkpoint_epoch_0027 +[MEM @ epoch 30 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 31/499 (6% done) --- + [Epoch 31][10/40] loss=0.797782 avg=0.886708 VRAM=38.9GiB | 6.2% done | ETA(epoch): 692s + [Epoch 31][20/40] loss=0.879655 avg=0.871087 VRAM=38.8GiB | 6.3% done | ETA(epoch): 461s + [Epoch 31][30/40] loss=0.947848 avg=0.870409 VRAM=38.9GiB | 6.3% done | ETA(epoch): 231s + [Epoch 31][40/40] loss=0.899337 avg=0.870366 VRAM=38.8GiB | 6.4% done | ETA(epoch): 0s + Train loss: 0.870366 (922.0s) ETA: 7865min + Val loss: 0.864212 [t_0.0-0.2=1.0804 t_0.2-0.4=1.0031 t_0.4-0.6=0.8592 t_0.6-0.8=0.7264 t_0.8-1.0=0.7340] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0031 (BEST) + Deleted old checkpoint: checkpoint_epoch_0021 + Deleted old checkpoint: checkpoint_epoch_0028 +[MEM @ epoch 31 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 32/499 (6% done) --- + [Epoch 32][10/40] loss=0.794120 avg=0.855880 VRAM=38.9GiB | 6.5% done | ETA(epoch): 692s + [Epoch 32][20/40] loss=0.900117 avg=0.874332 VRAM=38.8GiB | 6.5% done | ETA(epoch): 461s + [Epoch 32][30/40] loss=0.821054 avg=0.872348 VRAM=38.9GiB | 6.6% done | ETA(epoch): 231s + [Epoch 32][40/40] loss=0.930103 avg=0.869840 VRAM=38.8GiB | 6.6% done | ETA(epoch): 0s + Train loss: 0.869840 (922.4s) ETA: 7843min + Val loss: 0.863953 [t_0.0-0.2=1.0616 t_0.2-0.4=1.0233 t_0.4-0.6=0.8830 t_0.6-0.8=0.7275 t_0.8-1.0=0.7392] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0032 (BEST) + Deleted old checkpoint: checkpoint_epoch_0029 +[MEM @ epoch 32 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 33/499 (7% done) --- + [Epoch 33][10/40] loss=0.998197 avg=0.875222 VRAM=38.9GiB | 6.7% done | ETA(epoch): 692s + [Epoch 33][20/40] loss=0.875551 avg=0.872269 VRAM=38.8GiB | 6.7% done | ETA(epoch): 461s + [Epoch 33][30/40] loss=0.861741 avg=0.873886 VRAM=38.9GiB | 6.8% done | ETA(epoch): 231s + [Epoch 33][40/40] loss=0.888898 avg=0.873470 VRAM=38.8GiB | 6.8% done | ETA(epoch): 0s + Train loss: 0.873470 (922.4s) ETA: 7822min + Val loss: 0.858724 [t_0.0-0.2=1.0629 t_0.2-0.4=0.9997 t_0.4-0.6=0.8407 t_0.6-0.8=0.7513 t_0.8-1.0=0.7462] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0033 (BEST) + Deleted old checkpoint: checkpoint_epoch_0030 +[MEM @ epoch 33 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 34/499 (7% done) --- + [Epoch 34][10/40] loss=0.826517 avg=0.905362 VRAM=38.9GiB | 6.9% done | ETA(epoch): 691s + [Epoch 34][20/40] loss=0.874436 avg=0.888044 VRAM=38.8GiB | 6.9% done | ETA(epoch): 461s + [Epoch 34][30/40] loss=0.876727 avg=0.879328 VRAM=38.9GiB | 7.0% done | ETA(epoch): 231s + [Epoch 34][40/40] loss=0.895478 avg=0.882117 VRAM=38.8GiB | 7.0% done | ETA(epoch): 0s + Train loss: 0.882117 (922.5s) ETA: 7801min + Val loss: 0.889033 [t_0.0-0.2=1.0765 t_0.2-0.4=0.9986 t_0.4-0.6=0.8527 t_0.6-0.8=0.7190 t_0.8-1.0=0.7359] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0034 + Deleted old checkpoint: checkpoint_epoch_0031 +[MEM @ epoch 34 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 35/499 (7% done) --- + [Epoch 35][10/40] loss=0.873970 avg=0.842988 VRAM=38.9GiB | 7.0% done | ETA(epoch): 692s + [Epoch 35][20/40] loss=0.909860 avg=0.850282 VRAM=38.8GiB | 7.1% done | ETA(epoch): 461s + [Epoch 35][30/40] loss=0.871764 avg=0.866920 VRAM=38.9GiB | 7.1% done | ETA(epoch): 231s + [Epoch 35][40/40] loss=0.854639 avg=0.861740 VRAM=38.8GiB | 7.2% done | ETA(epoch): 0s + Train loss: 0.861740 (922.4s) ETA: 7780min + Val loss: 0.877221 [t_0.0-0.2=1.0774 t_0.2-0.4=0.9963 t_0.4-0.6=0.8498 t_0.6-0.8=0.7250 t_0.8-1.0=0.7007] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0035 + Deleted old checkpoint: checkpoint_epoch_0032 +[MEM @ epoch 35 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 36/499 (7% done) --- + [Epoch 36][10/40] loss=0.839046 avg=0.861803 VRAM=38.9GiB | 7.2% done | ETA(epoch): 692s + [Epoch 36][20/40] loss=0.831900 avg=0.869574 VRAM=38.8GiB | 7.3% done | ETA(epoch): 461s + [Epoch 36][30/40] loss=0.850483 avg=0.875603 VRAM=38.9GiB | 7.3% done | ETA(epoch): 231s + [Epoch 36][40/40] loss=0.826426 avg=0.870557 VRAM=38.8GiB | 7.4% done | ETA(epoch): 0s + Train loss: 0.870557 (922.6s) ETA: 7760min + Val loss: 0.888935 [t_0.0-0.2=1.0767 t_0.2-0.4=1.0091 t_0.4-0.6=0.8702 t_0.6-0.8=0.7464 t_0.8-1.0=0.7018] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0036 +[MEM @ epoch 36 end] RAM: 17.3/188.4 GiB (9.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 37/499 (7% done) --- + [Epoch 37][10/40] loss=0.940342 avg=0.885184 VRAM=38.9GiB | 7.4% done | ETA(epoch): 691s + [Epoch 37][20/40] loss=0.840504 avg=0.874057 VRAM=38.8GiB | 7.5% done | ETA(epoch): 461s + [Epoch 37][30/40] loss=0.955513 avg=0.878324 VRAM=38.9GiB | 7.5% done | ETA(epoch): 231s + [Epoch 37][40/40] loss=0.827887 avg=0.878172 VRAM=38.8GiB | 7.6% done | ETA(epoch): 0s + Train loss: 0.878172 (922.6s) ETA: 7740min + Val loss: 0.899912 [t_0.0-0.2=1.0818 t_0.2-0.4=0.9995 t_0.4-0.6=0.8451 t_0.6-0.8=0.7562 t_0.8-1.0=0.7201] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0037 + Deleted old checkpoint: checkpoint_epoch_0034 +[MEM @ epoch 37 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 38/499 (8% done) --- + [Epoch 38][10/40] loss=0.952596 avg=0.859923 VRAM=38.9GiB | 7.6% done | ETA(epoch): 692s + [Epoch 38][20/40] loss=0.859250 avg=0.869371 VRAM=38.8GiB | 7.7% done | ETA(epoch): 462s + [Epoch 38][30/40] loss=0.864681 avg=0.869972 VRAM=38.9GiB | 7.8% done | ETA(epoch): 231s + [Epoch 38][40/40] loss=0.866133 avg=0.870752 VRAM=38.8GiB | 7.8% done | ETA(epoch): 0s + Train loss: 0.870752 (922.9s) ETA: 7720min + Val loss: 0.848907 [t_0.0-0.2=1.0894 t_0.2-0.4=1.0151 t_0.4-0.6=0.8378 t_0.6-0.8=0.7414 t_0.8-1.0=0.7167] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0038 (BEST) + Deleted old checkpoint: checkpoint_epoch_0033 + Deleted old checkpoint: checkpoint_epoch_0035 +[MEM @ epoch 38 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 39/499 (8% done) --- + [Epoch 39][10/40] loss=0.819374 avg=0.856778 VRAM=38.9GiB | 7.8% done | ETA(epoch): 692s + [Epoch 39][20/40] loss=0.852519 avg=0.864317 VRAM=38.8GiB | 7.9% done | ETA(epoch): 462s + [Epoch 39][30/40] loss=0.818205 avg=0.866885 VRAM=38.9GiB | 8.0% done | ETA(epoch): 231s + [Epoch 39][40/40] loss=0.778597 avg=0.864094 VRAM=38.8GiB | 8.0% done | ETA(epoch): 0s + Train loss: 0.864094 (923.0s) ETA: 7700min + Val loss: 0.844874 [t_0.0-0.2=1.0807 t_0.2-0.4=0.9832 t_0.4-0.6=0.8516 t_0.6-0.8=0.7138 t_0.8-1.0=0.6920] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0039 (BEST) + Deleted old checkpoint: checkpoint_epoch_0036 +[MEM @ epoch 39 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 40/499 (8% done) --- + [Epoch 40][10/40] loss=0.896627 avg=0.883223 VRAM=38.9GiB | 8.1% done | ETA(epoch): 692s + [Epoch 40][20/40] loss=0.817944 avg=0.868095 VRAM=38.8GiB | 8.1% done | ETA(epoch): 461s + [Epoch 40][30/40] loss=0.869807 avg=0.865204 VRAM=38.9GiB | 8.2% done | ETA(epoch): 231s + [Epoch 40][40/40] loss=0.874562 avg=0.855825 VRAM=38.8GiB | 8.2% done | ETA(epoch): 0s + Train loss: 0.855825 (922.7s) ETA: 7680min + Val loss: 0.880708 [t_0.0-0.2=1.0695 t_0.2-0.4=1.0343 t_0.4-0.6=0.8848 t_0.6-0.8=0.7228 t_0.8-1.0=0.6925] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0040 + Deleted old checkpoint: checkpoint_epoch_0037 +[MEM @ epoch 40 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 41/499 (8% done) --- + [Epoch 41][10/40] loss=0.928862 avg=0.864253 VRAM=38.9GiB | 8.2% done | ETA(epoch): 692s + [Epoch 41][20/40] loss=0.856805 avg=0.867313 VRAM=38.8GiB | 8.3% done | ETA(epoch): 461s + [Epoch 41][30/40] loss=0.762415 avg=0.866666 VRAM=38.9GiB | 8.3% done | ETA(epoch): 231s + [Epoch 41][40/40] loss=0.917038 avg=0.866723 VRAM=38.8GiB | 8.4% done | ETA(epoch): 0s + Train loss: 0.866723 (922.5s) ETA: 7661min + Val loss: 0.856284 [t_0.0-0.2=1.0626 t_0.2-0.4=0.9811 t_0.4-0.6=0.8649 t_0.6-0.8=0.7251 t_0.8-1.0=0.6765] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0041 + Deleted old checkpoint: checkpoint_epoch_0038 +[MEM @ epoch 41 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 42/499 (8% done) --- + [Epoch 42][10/40] loss=0.778542 avg=0.852075 VRAM=38.9GiB | 8.5% done | ETA(epoch): 692s + [Epoch 42][20/40] loss=0.877528 avg=0.848610 VRAM=38.8GiB | 8.5% done | ETA(epoch): 461s + [Epoch 42][30/40] loss=0.875283 avg=0.857373 VRAM=38.9GiB | 8.6% done | ETA(epoch): 231s + [Epoch 42][40/40] loss=0.859165 avg=0.862327 VRAM=38.8GiB | 8.6% done | ETA(epoch): 0s + Train loss: 0.862327 (922.4s) ETA: 7641min + Val loss: 0.858252 [t_0.0-0.2=1.0675 t_0.2-0.4=1.0197 t_0.4-0.6=0.8465 t_0.6-0.8=0.7144 t_0.8-1.0=0.6645] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0042 +[MEM @ epoch 42 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 43/499 (9% done) --- + [Epoch 43][10/40] loss=0.918483 avg=0.868550 VRAM=38.9GiB | 8.6% done | ETA(epoch): 692s + [Epoch 43][20/40] loss=0.828165 avg=0.851148 VRAM=38.8GiB | 8.7% done | ETA(epoch): 461s + [Epoch 43][30/40] loss=0.901802 avg=0.855209 VRAM=38.9GiB | 8.8% done | ETA(epoch): 230s + [Epoch 43][40/40] loss=0.841502 avg=0.864290 VRAM=38.8GiB | 8.8% done | ETA(epoch): 0s + Train loss: 0.864290 (921.8s) ETA: 7622min + Val loss: 0.868841 [t_0.0-0.2=1.0670 t_0.2-0.4=0.9913 t_0.4-0.6=0.8533 t_0.6-0.8=0.7310 t_0.8-1.0=0.6880] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0043 + Deleted old checkpoint: checkpoint_epoch_0040 +[MEM @ epoch 43 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 44/499 (9% done) --- + [Epoch 44][10/40] loss=0.783121 avg=0.858392 VRAM=38.9GiB | 8.8% done | ETA(epoch): 691s + [Epoch 44][20/40] loss=0.846959 avg=0.849261 VRAM=38.8GiB | 8.9% done | ETA(epoch): 461s + [Epoch 44][30/40] loss=0.840710 avg=0.848258 VRAM=38.9GiB | 8.9% done | ETA(epoch): 230s + [Epoch 44][40/40] loss=0.843931 avg=0.848012 VRAM=38.8GiB | 9.0% done | ETA(epoch): 0s + Train loss: 0.848012 (921.6s) ETA: 7603min + Val loss: 0.870997 [t_0.0-0.2=1.0491 t_0.2-0.4=1.0089 t_0.4-0.6=0.8740 t_0.6-0.8=0.7116 t_0.8-1.0=0.6768] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0044 + Deleted old checkpoint: checkpoint_epoch_0041 +[MEM @ epoch 44 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 45/499 (9% done) --- + [Epoch 45][10/40] loss=0.807653 avg=0.835095 VRAM=38.9GiB | 9.0% done | ETA(epoch): 691s + [Epoch 45][20/40] loss=0.878456 avg=0.853430 VRAM=38.8GiB | 9.1% done | ETA(epoch): 461s + [Epoch 45][30/40] loss=0.866131 avg=0.860378 VRAM=38.9GiB | 9.2% done | ETA(epoch): 230s + [Epoch 45][40/40] loss=0.891546 avg=0.864162 VRAM=38.8GiB | 9.2% done | ETA(epoch): 0s + Train loss: 0.864162 (921.8s) ETA: 7583min + Val loss: 0.884957 [t_0.0-0.2=1.0664 t_0.2-0.4=1.0237 t_0.4-0.6=0.7991 t_0.6-0.8=0.7479 t_0.8-1.0=0.6893] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0045 + Deleted old checkpoint: checkpoint_epoch_0042 +[MEM @ epoch 45 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 46/499 (9% done) --- + [Epoch 46][10/40] loss=0.941427 avg=0.879382 VRAM=38.9GiB | 9.2% done | ETA(epoch): 691s + [Epoch 46][20/40] loss=0.965166 avg=0.891074 VRAM=38.8GiB | 9.3% done | ETA(epoch): 461s + [Epoch 46][30/40] loss=0.902150 avg=0.887721 VRAM=38.9GiB | 9.3% done | ETA(epoch): 230s + [Epoch 46][40/40] loss=0.775639 avg=0.883396 VRAM=38.8GiB | 9.4% done | ETA(epoch): 0s + Train loss: 0.883396 (921.4s) ETA: 7564min + Val loss: 0.860964 [t_0.0-0.2=1.0833 t_0.2-0.4=0.9619 t_0.4-0.6=0.8516 t_0.6-0.8=0.7487 t_0.8-1.0=0.6921] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0046 + Deleted old checkpoint: checkpoint_epoch_0043 +[MEM @ epoch 46 end] RAM: 17.1/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 47/499 (9% done) --- + [Epoch 47][10/40] loss=0.810788 avg=0.844703 VRAM=38.9GiB | 9.4% done | ETA(epoch): 691s + [Epoch 47][20/40] loss=0.760503 avg=0.847735 VRAM=38.8GiB | 9.5% done | ETA(epoch): 461s + [Epoch 47][30/40] loss=0.905706 avg=0.853525 VRAM=38.9GiB | 9.6% done | ETA(epoch): 231s + [Epoch 47][40/40] loss=0.920078 avg=0.862638 VRAM=38.8GiB | 9.6% done | ETA(epoch): 0s + Train loss: 0.862638 (922.2s) ETA: 7545min + Val loss: 0.865990 [t_0.0-0.2=1.0740 t_0.2-0.4=1.0056 t_0.4-0.6=0.8577 t_0.6-0.8=0.7211 t_0.8-1.0=0.6809] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0047 + Deleted old checkpoint: checkpoint_epoch_0044 +[MEM @ epoch 47 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 48/499 (10% done) --- + [Epoch 48][10/40] loss=0.874853 avg=0.858937 VRAM=38.9GiB | 9.7% done | ETA(epoch): 691s + [Epoch 48][20/40] loss=0.806827 avg=0.856753 VRAM=38.8GiB | 9.7% done | ETA(epoch): 461s + [Epoch 48][30/40] loss=0.862621 avg=0.860486 VRAM=38.9GiB | 9.8% done | ETA(epoch): 230s + [Epoch 48][40/40] loss=0.932471 avg=0.860873 VRAM=38.8GiB | 9.8% done | ETA(epoch): 0s + Train loss: 0.860873 (922.0s) ETA: 7527min + Val loss: 0.864656 [t_0.0-0.2=1.0710 t_0.2-0.4=0.9993 t_0.4-0.6=0.8403 t_0.6-0.8=0.7506 t_0.8-1.0=0.6696] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0048 + Deleted old checkpoint: checkpoint_epoch_0045 +[MEM @ epoch 48 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 49/499 (10% done) --- + [Epoch 49][10/40] loss=0.909884 avg=0.870897 VRAM=38.9GiB | 9.8% done | ETA(epoch): 692s + [Epoch 49][20/40] loss=0.831821 avg=0.860264 VRAM=38.8GiB | 9.9% done | ETA(epoch): 461s + [Epoch 49][30/40] loss=0.804712 avg=0.857804 VRAM=38.9GiB | 10.0% done | ETA(epoch): 231s + [Epoch 49][40/40] loss=0.845985 avg=0.857861 VRAM=38.8GiB | 10.0% done | ETA(epoch): 0s + Train loss: 0.857861 (922.3s) ETA: 7508min + Val loss: 0.876145 [t_0.0-0.2=1.0776 t_0.2-0.4=1.0215 t_0.4-0.6=0.8691 t_0.6-0.8=0.7484 t_0.8-1.0=0.6869] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0049 + Deleted old checkpoint: checkpoint_epoch_0046 +[MEM @ epoch 49 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 50/499 (10% done) --- + [MilestoneVis] train_0 step 2000 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 2000 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 2000 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 2000 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 2000 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 2000 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 2000 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 2000 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 2000 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 2000 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 50][10/40] loss=0.927888 avg=0.888960 VRAM=38.9GiB | 10.1% done | ETA(epoch): 2936s + [Epoch 50][20/40] loss=0.940735 avg=0.892129 VRAM=38.8GiB | 10.1% done | ETA(epoch): 1209s + [Epoch 50][30/40] loss=0.902916 avg=0.889099 VRAM=38.9GiB | 10.2% done | ETA(epoch): 480s + [Epoch 50][40/40] loss=0.880552 avg=0.887615 VRAM=38.8GiB | 10.2% done | ETA(epoch): 0s + Train loss: 0.887615 (1670.7s) ETA: 7599min + Val loss: 0.864311 [t_0.0-0.2=1.0699 t_0.2-0.4=1.0426 t_0.4-0.6=0.8255 t_0.6-0.8=0.7387 t_0.8-1.0=0.6712] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0050 + Deleted old checkpoint: checkpoint_epoch_0047 +[MEM @ epoch 50 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 51/499 (10% done) --- + [Epoch 51][10/40] loss=0.962786 avg=0.869604 VRAM=38.9GiB | 10.2% done | ETA(epoch): 691s + [Epoch 51][20/40] loss=0.880361 avg=0.883059 VRAM=38.8GiB | 10.3% done | ETA(epoch): 461s + [Epoch 51][30/40] loss=0.820489 avg=0.877764 VRAM=38.9GiB | 10.3% done | ETA(epoch): 230s + [Epoch 51][40/40] loss=0.876850 avg=0.874866 VRAM=38.8GiB | 10.4% done | ETA(epoch): 0s + Train loss: 0.874866 (921.8s) ETA: 7578min + Val loss: 0.872740 [t_0.0-0.2=1.0557 t_0.2-0.4=0.9971 t_0.4-0.6=0.8459 t_0.6-0.8=0.7095 t_0.8-1.0=0.6834] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0051 + Deleted old checkpoint: checkpoint_epoch_0048 +[MEM @ epoch 51 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 52/499 (10% done) --- + [Epoch 52][10/40] loss=0.985829 avg=0.862562 VRAM=38.9GiB | 10.4% done | ETA(epoch): 691s + [Epoch 52][20/40] loss=0.844198 avg=0.875252 VRAM=38.8GiB | 10.5% done | ETA(epoch): 461s + [Epoch 52][30/40] loss=0.937694 avg=0.878312 VRAM=38.9GiB | 10.5% done | ETA(epoch): 230s + [Epoch 52][40/40] loss=0.813982 avg=0.870274 VRAM=38.8GiB | 10.6% done | ETA(epoch): 0s + Train loss: 0.870274 (922.2s) ETA: 7557min + Val loss: 0.858723 [t_0.0-0.2=1.0820 t_0.2-0.4=1.0167 t_0.4-0.6=0.8582 t_0.6-0.8=0.6918 t_0.8-1.0=0.6763] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0052 + Deleted old checkpoint: checkpoint_epoch_0049 +[MEM @ epoch 52 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 53/499 (11% done) --- + [Epoch 53][10/40] loss=0.858102 avg=0.858830 VRAM=38.9GiB | 10.7% done | ETA(epoch): 692s + [Epoch 53][20/40] loss=0.929033 avg=0.848431 VRAM=38.8GiB | 10.7% done | ETA(epoch): 461s + [Epoch 53][30/40] loss=0.853477 avg=0.855388 VRAM=38.9GiB | 10.8% done | ETA(epoch): 231s + [Epoch 53][40/40] loss=0.884562 avg=0.863390 VRAM=38.8GiB | 10.8% done | ETA(epoch): 0s + Train loss: 0.863390 (922.7s) ETA: 7537min + Val loss: 0.863062 [t_0.0-0.2=1.0637 t_0.2-0.4=1.0212 t_0.4-0.6=0.8407 t_0.6-0.8=0.7408 t_0.8-1.0=0.6841] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0053 + Deleted old checkpoint: checkpoint_epoch_0050 +[MEM @ epoch 53 end] RAM: 17.6/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 54/499 (11% done) --- + [Epoch 54][10/40] loss=0.853051 avg=0.826696 VRAM=38.9GiB | 10.8% done | ETA(epoch): 692s + [Epoch 54][20/40] loss=0.794739 avg=0.849046 VRAM=38.8GiB | 10.9% done | ETA(epoch): 461s + [Epoch 54][30/40] loss=0.832467 avg=0.854766 VRAM=38.9GiB | 10.9% done | ETA(epoch): 231s + [Epoch 54][40/40] loss=0.952096 avg=0.863152 VRAM=38.8GiB | 11.0% done | ETA(epoch): 0s + Train loss: 0.863152 (922.3s) ETA: 7517min + Val loss: 0.880494 [t_0.0-0.2=1.0530 t_0.2-0.4=1.0151 t_0.4-0.6=0.8654 t_0.6-0.8=0.7151 t_0.8-1.0=0.6894] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0054 + Deleted old checkpoint: checkpoint_epoch_0051 +[MEM @ epoch 54 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 55/499 (11% done) --- + [Epoch 55][10/40] loss=0.827635 avg=0.864133 VRAM=38.9GiB | 11.1% done | ETA(epoch): 691s + [Epoch 55][20/40] loss=0.899472 avg=0.864443 VRAM=38.8GiB | 11.1% done | ETA(epoch): 461s + [Epoch 55][30/40] loss=0.793346 avg=0.849642 VRAM=38.9GiB | 11.2% done | ETA(epoch): 231s + [Epoch 55][40/40] loss=0.905831 avg=0.858170 VRAM=38.8GiB | 11.2% done | ETA(epoch): 0s + Train loss: 0.858170 (922.3s) ETA: 7496min + Val loss: 0.882803 [t_0.0-0.2=1.0782 t_0.2-0.4=1.0242 t_0.4-0.6=0.8784 t_0.6-0.8=0.7125 t_0.8-1.0=0.6847] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0055 + Deleted old checkpoint: checkpoint_epoch_0052 +[MEM @ epoch 55 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 56/499 (11% done) --- + [Epoch 56][10/40] loss=0.854968 avg=0.878368 VRAM=38.9GiB | 11.2% done | ETA(epoch): 692s + [Epoch 56][20/40] loss=0.853447 avg=0.877571 VRAM=38.8GiB | 11.3% done | ETA(epoch): 461s + [Epoch 56][30/40] loss=0.838321 avg=0.869766 VRAM=38.9GiB | 11.3% done | ETA(epoch): 231s + [Epoch 56][40/40] loss=0.838643 avg=0.870745 VRAM=38.8GiB | 11.4% done | ETA(epoch): 0s + Train loss: 0.870745 (922.5s) ETA: 7476min + Val loss: 0.916993 [t_0.0-0.2=1.0750 t_0.2-0.4=0.9966 t_0.4-0.6=0.8753 t_0.6-0.8=0.7274 t_0.8-1.0=0.6655] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0056 + Deleted old checkpoint: checkpoint_epoch_0053 +[MEM @ epoch 56 end] RAM: 17.6/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 57/499 (11% done) --- + [Epoch 57][10/40] loss=0.832011 avg=0.872417 VRAM=38.9GiB | 11.5% done | ETA(epoch): 692s + [Epoch 57][20/40] loss=0.753717 avg=0.866377 VRAM=38.8GiB | 11.5% done | ETA(epoch): 461s + [Epoch 57][30/40] loss=0.849655 avg=0.860843 VRAM=38.9GiB | 11.6% done | ETA(epoch): 230s + [Epoch 57][40/40] loss=0.851039 avg=0.864597 VRAM=38.8GiB | 11.6% done | ETA(epoch): 0s + Train loss: 0.864597 (921.7s) ETA: 7456min + Val loss: 0.850234 [t_0.0-0.2=1.0726 t_0.2-0.4=0.9766 t_0.4-0.6=0.8500 t_0.6-0.8=0.7461 t_0.8-1.0=0.6703] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0057 + Deleted old checkpoint: checkpoint_epoch_0054 +[MEM @ epoch 57 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 58/499 (12% done) --- + [Epoch 58][10/40] loss=0.943924 avg=0.896238 VRAM=38.9GiB | 11.7% done | ETA(epoch): 691s + [Epoch 58][20/40] loss=0.893270 avg=0.864925 VRAM=38.8GiB | 11.7% done | ETA(epoch): 461s + [Epoch 58][30/40] loss=0.874994 avg=0.875950 VRAM=38.9GiB | 11.8% done | ETA(epoch): 230s + [Epoch 58][40/40] loss=0.863305 avg=0.867921 VRAM=38.8GiB | 11.8% done | ETA(epoch): 0s + Train loss: 0.867921 (921.8s) ETA: 7436min + Val loss: 0.882953 [t_0.0-0.2=1.0741 t_0.2-0.4=0.9813 t_0.4-0.6=0.8505 t_0.6-0.8=0.7226 t_0.8-1.0=0.6639] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0058 + Deleted old checkpoint: checkpoint_epoch_0055 +[MEM @ epoch 58 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 59/499 (12% done) --- + [Epoch 59][10/40] loss=0.947300 avg=0.869642 VRAM=38.9GiB | 11.8% done | ETA(epoch): 691s + [Epoch 59][20/40] loss=0.879687 avg=0.880008 VRAM=38.8GiB | 11.9% done | ETA(epoch): 461s + [Epoch 59][30/40] loss=0.965547 avg=0.878464 VRAM=38.9GiB | 11.9% done | ETA(epoch): 231s + [Epoch 59][40/40] loss=0.851065 avg=0.876212 VRAM=38.8GiB | 12.0% done | ETA(epoch): 0s + Train loss: 0.876212 (922.3s) ETA: 7416min + Val loss: 0.875124 [t_0.0-0.2=1.0637 t_0.2-0.4=1.0075 t_0.4-0.6=0.8651 t_0.6-0.8=0.7228 t_0.8-1.0=0.6877] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0059 + Deleted old checkpoint: checkpoint_epoch_0056 +[MEM @ epoch 59 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 60/499 (12% done) --- + [Epoch 60][10/40] loss=0.828391 avg=0.831956 VRAM=38.9GiB | 12.0% done | ETA(epoch): 692s + [Epoch 60][20/40] loss=0.827194 avg=0.855918 VRAM=38.8GiB | 12.1% done | ETA(epoch): 461s + [Epoch 60][30/40] loss=0.967123 avg=0.863694 VRAM=38.9GiB | 12.2% done | ETA(epoch): 231s + [Epoch 60][40/40] loss=0.879534 avg=0.865165 VRAM=38.8GiB | 12.2% done | ETA(epoch): 0s + Train loss: 0.865165 (922.4s) ETA: 7397min + Val loss: 0.919500 [t_0.0-0.2=1.0766 t_0.2-0.4=1.0142 t_0.4-0.6=0.8640 t_0.6-0.8=0.6986 t_0.8-1.0=0.6729] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0060 + Deleted old checkpoint: checkpoint_epoch_0057 +[MEM @ epoch 60 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 61/499 (12% done) --- + [Epoch 61][10/40] loss=0.861573 avg=0.868566 VRAM=38.9GiB | 12.2% done | ETA(epoch): 691s + [Epoch 61][20/40] loss=0.830780 avg=0.876591 VRAM=38.8GiB | 12.3% done | ETA(epoch): 461s + [Epoch 61][30/40] loss=0.895565 avg=0.874619 VRAM=38.9GiB | 12.3% done | ETA(epoch): 230s + [Epoch 61][40/40] loss=0.882785 avg=0.870555 VRAM=38.8GiB | 12.4% done | ETA(epoch): 0s + Train loss: 0.870555 (922.1s) ETA: 7377min + Val loss: 0.844324 [t_0.0-0.2=1.0819 t_0.2-0.4=1.0285 t_0.4-0.6=0.8274 t_0.6-0.8=0.7054 t_0.8-1.0=0.6660] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0061 (BEST) + Deleted old checkpoint: checkpoint_epoch_0039 + Deleted old checkpoint: checkpoint_epoch_0058 +[MEM @ epoch 61 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 62/499 (12% done) --- + [Epoch 62][10/40] loss=0.811842 avg=0.884288 VRAM=38.9GiB | 12.4% done | ETA(epoch): 691s + [Epoch 62][20/40] loss=0.826904 avg=0.879912 VRAM=38.8GiB | 12.5% done | ETA(epoch): 461s + [Epoch 62][30/40] loss=0.751204 avg=0.878309 VRAM=38.9GiB | 12.6% done | ETA(epoch): 231s + [Epoch 62][40/40] loss=0.838286 avg=0.879057 VRAM=38.8GiB | 12.6% done | ETA(epoch): 0s + Train loss: 0.879057 (922.2s) ETA: 7358min + Val loss: 0.878958 [t_0.0-0.2=1.0844 t_0.2-0.4=0.9981 t_0.4-0.6=0.8380 t_0.6-0.8=0.7113 t_0.8-1.0=0.6792] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0062 + Deleted old checkpoint: checkpoint_epoch_0059 +[MEM @ epoch 62 end] RAM: 17.8/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 63/499 (13% done) --- + [Epoch 63][10/40] loss=0.855886 avg=0.844622 VRAM=38.9GiB | 12.7% done | ETA(epoch): 692s + [Epoch 63][20/40] loss=0.839380 avg=0.851006 VRAM=38.8GiB | 12.7% done | ETA(epoch): 461s + [Epoch 63][30/40] loss=0.825359 avg=0.858127 VRAM=38.9GiB | 12.8% done | ETA(epoch): 231s + [Epoch 63][40/40] loss=0.881175 avg=0.866573 VRAM=38.8GiB | 12.8% done | ETA(epoch): 0s + Train loss: 0.866573 (922.3s) ETA: 7338min + Val loss: 0.850899 [t_0.0-0.2=1.0699 t_0.2-0.4=0.9744 t_0.4-0.6=0.8486 t_0.6-0.8=0.7224 t_0.8-1.0=0.6641] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0063 + Deleted old checkpoint: checkpoint_epoch_0060 +[MEM @ epoch 63 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 64/499 (13% done) --- + [Epoch 64][10/40] loss=0.823734 avg=0.854919 VRAM=38.9GiB | 12.8% done | ETA(epoch): 692s + [Epoch 64][20/40] loss=0.939906 avg=0.854084 VRAM=38.8GiB | 12.9% done | ETA(epoch): 461s + [Epoch 64][30/40] loss=0.891597 avg=0.845548 VRAM=38.9GiB | 13.0% done | ETA(epoch): 231s + [Epoch 64][40/40] loss=0.818679 avg=0.845820 VRAM=38.8GiB | 13.0% done | ETA(epoch): 0s + Train loss: 0.845820 (922.5s) ETA: 7319min + Val loss: 0.871013 [t_0.0-0.2=1.0752 t_0.2-0.4=1.0145 t_0.4-0.6=0.8824 t_0.6-0.8=0.7371 t_0.8-1.0=0.6579] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0064 +[MEM @ epoch 64 end] RAM: 17.8/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 65/499 (13% done) --- + [Epoch 65][10/40] loss=0.855909 avg=0.846061 VRAM=38.9GiB | 13.1% done | ETA(epoch): 692s + [Epoch 65][20/40] loss=0.806302 avg=0.845205 VRAM=38.8GiB | 13.1% done | ETA(epoch): 462s + [Epoch 65][30/40] loss=0.911970 avg=0.858177 VRAM=38.9GiB | 13.2% done | ETA(epoch): 231s + [Epoch 65][40/40] loss=0.800640 avg=0.856641 VRAM=38.8GiB | 13.2% done | ETA(epoch): 0s + Train loss: 0.856641 (923.0s) ETA: 7300min + Val loss: 0.839920 [t_0.0-0.2=1.0817 t_0.2-0.4=1.0221 t_0.4-0.6=0.8525 t_0.6-0.8=0.7216 t_0.8-1.0=0.6561] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0065 (BEST) + Deleted old checkpoint: checkpoint_epoch_0061 + Deleted old checkpoint: checkpoint_epoch_0062 +[MEM @ epoch 65 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 66/499 (13% done) --- + [Epoch 66][10/40] loss=0.821242 avg=0.871061 VRAM=38.9GiB | 13.2% done | ETA(epoch): 692s + [Epoch 66][20/40] loss=0.893287 avg=0.871235 VRAM=38.8GiB | 13.3% done | ETA(epoch): 461s + [Epoch 66][30/40] loss=0.883897 avg=0.873776 VRAM=38.9GiB | 13.4% done | ETA(epoch): 231s + [Epoch 66][40/40] loss=0.832141 avg=0.872642 VRAM=38.8GiB | 13.4% done | ETA(epoch): 0s + Train loss: 0.872642 (922.5s) ETA: 7281min + Val loss: 0.854954 [t_0.0-0.2=1.0719 t_0.2-0.4=1.0065 t_0.4-0.6=0.8477 t_0.6-0.8=0.6967 t_0.8-1.0=0.6688] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0066 + Deleted old checkpoint: checkpoint_epoch_0063 +[MEM @ epoch 66 end] RAM: 17.9/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 67/499 (13% done) --- + [Epoch 67][10/40] loss=0.830516 avg=0.849217 VRAM=38.9GiB | 13.5% done | ETA(epoch): 691s + [Epoch 67][20/40] loss=0.922593 avg=0.854151 VRAM=38.8GiB | 13.5% done | ETA(epoch): 461s + [Epoch 67][30/40] loss=0.849196 avg=0.859137 VRAM=38.9GiB | 13.6% done | ETA(epoch): 230s + [Epoch 67][40/40] loss=0.923142 avg=0.864070 VRAM=38.8GiB | 13.6% done | ETA(epoch): 0s + Train loss: 0.864070 (921.9s) ETA: 7262min + Val loss: 0.872010 [t_0.0-0.2=1.0750 t_0.2-0.4=1.0148 t_0.4-0.6=0.8868 t_0.6-0.8=0.7012 t_0.8-1.0=0.6609] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0067 + Deleted old checkpoint: checkpoint_epoch_0064 +[MEM @ epoch 67 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 68/499 (14% done) --- + [Epoch 68][10/40] loss=0.775450 avg=0.852957 VRAM=38.9GiB | 13.7% done | ETA(epoch): 691s + [Epoch 68][20/40] loss=0.918475 avg=0.854083 VRAM=38.8GiB | 13.7% done | ETA(epoch): 461s + [Epoch 68][30/40] loss=0.842999 avg=0.852012 VRAM=38.9GiB | 13.8% done | ETA(epoch): 231s + [Epoch 68][40/40] loss=0.813200 avg=0.856756 VRAM=38.8GiB | 13.8% done | ETA(epoch): 0s + Train loss: 0.856756 (922.4s) ETA: 7243min + Val loss: 0.892400 [t_0.0-0.2=1.0667 t_0.2-0.4=1.0217 t_0.4-0.6=0.8439 t_0.6-0.8=0.7164 t_0.8-1.0=0.6814] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0068 +[MEM @ epoch 68 end] RAM: 17.9/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 69/499 (14% done) --- + [Epoch 69][10/40] loss=0.889801 avg=0.866345 VRAM=38.9GiB | 13.9% done | ETA(epoch): 691s + [Epoch 69][20/40] loss=0.836657 avg=0.880192 VRAM=38.8GiB | 13.9% done | ETA(epoch): 461s + [Epoch 69][30/40] loss=0.862514 avg=0.875846 VRAM=38.9GiB | 14.0% done | ETA(epoch): 231s + [Epoch 69][40/40] loss=0.797928 avg=0.874949 VRAM=38.8GiB | 14.0% done | ETA(epoch): 0s + Train loss: 0.874949 (922.1s) ETA: 7224min + Val loss: 0.846343 [t_0.0-0.2=1.0863 t_0.2-0.4=0.9631 t_0.4-0.6=0.8448 t_0.6-0.8=0.7258 t_0.8-1.0=0.6601] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0069 + Deleted old checkpoint: checkpoint_epoch_0066 +[MEM @ epoch 69 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 70/499 (14% done) --- + [Epoch 70][10/40] loss=0.817003 avg=0.899003 VRAM=38.9GiB | 14.1% done | ETA(epoch): 691s + [Epoch 70][20/40] loss=0.914205 avg=0.888091 VRAM=38.8GiB | 14.1% done | ETA(epoch): 461s + [Epoch 70][30/40] loss=0.826578 avg=0.882777 VRAM=38.9GiB | 14.1% done | ETA(epoch): 230s + [Epoch 70][40/40] loss=0.832166 avg=0.873909 VRAM=38.8GiB | 14.2% done | ETA(epoch): 0s + Train loss: 0.873909 (921.8s) ETA: 7205min + Val loss: 0.849106 [t_0.0-0.2=1.0775 t_0.2-0.4=0.9999 t_0.4-0.6=0.8258 t_0.6-0.8=0.7302 t_0.8-1.0=0.6714] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0070 + Deleted old checkpoint: checkpoint_epoch_0067 +[MEM @ epoch 70 end] RAM: 17.8/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 71/499 (14% done) --- + [Epoch 71][10/40] loss=0.788497 avg=0.857271 VRAM=38.9GiB | 14.2% done | ETA(epoch): 691s + [Epoch 71][20/40] loss=0.904041 avg=0.868235 VRAM=38.8GiB | 14.3% done | ETA(epoch): 461s + [Epoch 71][30/40] loss=0.913722 avg=0.864531 VRAM=38.9GiB | 14.3% done | ETA(epoch): 230s + [Epoch 71][40/40] loss=0.868497 avg=0.860158 VRAM=38.8GiB | 14.4% done | ETA(epoch): 0s + Train loss: 0.860158 (922.1s) ETA: 7186min + Val loss: 0.859708 [t_0.0-0.2=1.0760 t_0.2-0.4=0.9995 t_0.4-0.6=0.8117 t_0.6-0.8=0.7306 t_0.8-1.0=0.6586] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0071 + Deleted old checkpoint: checkpoint_epoch_0068 +[MEM @ epoch 71 end] RAM: 17.8/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 72/499 (14% done) --- + [Epoch 72][10/40] loss=0.898227 avg=0.857568 VRAM=38.9GiB | 14.4% done | ETA(epoch): 691s + [Epoch 72][20/40] loss=0.790843 avg=0.866944 VRAM=38.8GiB | 14.5% done | ETA(epoch): 461s + [Epoch 72][30/40] loss=0.805716 avg=0.869244 VRAM=38.9GiB | 14.5% done | ETA(epoch): 231s + [Epoch 72][40/40] loss=0.864586 avg=0.870290 VRAM=38.8GiB | 14.6% done | ETA(epoch): 0s + Train loss: 0.870290 (922.5s) ETA: 7168min + Val loss: 0.861323 [t_0.0-0.2=1.0726 t_0.2-0.4=0.9891 t_0.4-0.6=0.8271 t_0.6-0.8=0.7151 t_0.8-1.0=0.6720] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0072 + Deleted old checkpoint: checkpoint_epoch_0069 +[MEM @ epoch 72 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 73/499 (15% done) --- + [Epoch 73][10/40] loss=0.836480 avg=0.863524 VRAM=38.9GiB | 14.6% done | ETA(epoch): 692s + [Epoch 73][20/40] loss=0.798080 avg=0.866185 VRAM=38.8GiB | 14.7% done | ETA(epoch): 461s + [Epoch 73][30/40] loss=0.945131 avg=0.866008 VRAM=38.9GiB | 14.8% done | ETA(epoch): 231s + [Epoch 73][40/40] loss=0.764726 avg=0.866275 VRAM=38.8GiB | 14.8% done | ETA(epoch): 0s + Train loss: 0.866275 (922.6s) ETA: 7149min + Val loss: 0.850331 [t_0.0-0.2=1.0800 t_0.2-0.4=0.9990 t_0.4-0.6=0.8771 t_0.6-0.8=0.7055 t_0.8-1.0=0.6642] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0073 + Deleted old checkpoint: checkpoint_epoch_0070 +[MEM @ epoch 73 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 74/499 (15% done) --- + [Epoch 74][10/40] loss=0.820143 avg=0.844723 VRAM=38.9GiB | 14.8% done | ETA(epoch): 692s + [Epoch 74][20/40] loss=0.905284 avg=0.854821 VRAM=38.8GiB | 14.9% done | ETA(epoch): 461s + [Epoch 74][30/40] loss=0.913971 avg=0.855644 VRAM=38.9GiB | 14.9% done | ETA(epoch): 231s + [Epoch 74][40/40] loss=0.993087 avg=0.856758 VRAM=38.8GiB | 15.0% done | ETA(epoch): 0s + Train loss: 0.856758 (922.3s) ETA: 7131min + Val loss: 0.848397 [t_0.0-0.2=1.0748 t_0.2-0.4=0.9675 t_0.4-0.6=0.8603 t_0.6-0.8=0.7075 t_0.8-1.0=0.6477] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0074 + Deleted old checkpoint: checkpoint_epoch_0071 +[MEM @ epoch 74 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 75/499 (15% done) --- + [Epoch 75][10/40] loss=0.826777 avg=0.840469 VRAM=38.9GiB | 15.0% done | ETA(epoch): 692s + [Epoch 75][20/40] loss=0.912400 avg=0.856858 VRAM=38.8GiB | 15.1% done | ETA(epoch): 461s + [Epoch 75][30/40] loss=0.881094 avg=0.851068 VRAM=38.9GiB | 15.2% done | ETA(epoch): 231s + [Epoch 75][40/40] loss=0.959814 avg=0.855260 VRAM=38.8GiB | 15.2% done | ETA(epoch): 0s + Train loss: 0.855260 (922.6s) ETA: 7112min + Val loss: 0.860136 [t_0.0-0.2=1.0727 t_0.2-0.4=0.9905 t_0.4-0.6=0.8612 t_0.6-0.8=0.7246 t_0.8-1.0=0.6795] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0075 + Deleted old checkpoint: checkpoint_epoch_0072 +[MEM @ epoch 75 end] RAM: 18.0/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 76/499 (15% done) --- + [Epoch 76][10/40] loss=0.887585 avg=0.831959 VRAM=38.9GiB | 15.2% done | ETA(epoch): 692s + [Epoch 76][20/40] loss=0.947539 avg=0.846652 VRAM=38.8GiB | 15.3% done | ETA(epoch): 461s + [Epoch 76][30/40] loss=0.854096 avg=0.856977 VRAM=38.9GiB | 15.3% done | ETA(epoch): 231s + [Epoch 76][40/40] loss=0.851785 avg=0.851642 VRAM=38.8GiB | 15.4% done | ETA(epoch): 0s + Train loss: 0.851642 (922.5s) ETA: 7094min + Val loss: 0.834401 [t_0.0-0.2=1.0789 t_0.2-0.4=1.0002 t_0.4-0.6=0.8504 t_0.6-0.8=0.7056 t_0.8-1.0=0.6688] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0076 (BEST) + Deleted old checkpoint: checkpoint_epoch_0065 + Deleted old checkpoint: checkpoint_epoch_0073 +[MEM @ epoch 76 end] RAM: 17.9/188.4 GiB (9.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 77/499 (15% done) --- + [Epoch 77][10/40] loss=0.829685 avg=0.863043 VRAM=38.9GiB | 15.4% done | ETA(epoch): 692s + [Epoch 77][20/40] loss=0.822501 avg=0.841620 VRAM=38.8GiB | 15.5% done | ETA(epoch): 461s + [Epoch 77][30/40] loss=0.946647 avg=0.849549 VRAM=38.9GiB | 15.6% done | ETA(epoch): 231s + [Epoch 77][40/40] loss=0.794340 avg=0.853490 VRAM=38.8GiB | 15.6% done | ETA(epoch): 0s + Train loss: 0.853490 (922.7s) ETA: 7075min + Val loss: 0.888781 [t_0.0-0.2=1.0738 t_0.2-0.4=1.0207 t_0.4-0.6=0.8529 t_0.6-0.8=0.6947 t_0.8-1.0=0.6658] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0077 + Deleted old checkpoint: checkpoint_epoch_0074 +[MEM @ epoch 77 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 78/499 (16% done) --- + [Epoch 78][10/40] loss=0.821298 avg=0.873249 VRAM=38.9GiB | 15.7% done | ETA(epoch): 691s + [Epoch 78][20/40] loss=0.960997 avg=0.886773 VRAM=38.8GiB | 15.7% done | ETA(epoch): 461s + [Epoch 78][30/40] loss=0.831465 avg=0.878859 VRAM=38.9GiB | 15.8% done | ETA(epoch): 230s + [Epoch 78][40/40] loss=0.902926 avg=0.879672 VRAM=38.8GiB | 15.8% done | ETA(epoch): 0s + Train loss: 0.879672 (921.7s) ETA: 7057min + Val loss: 0.865802 [t_0.0-0.2=1.0782 t_0.2-0.4=1.0207 t_0.4-0.6=0.8502 t_0.6-0.8=0.7574 t_0.8-1.0=0.7037] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0078 + Deleted old checkpoint: checkpoint_epoch_0075 +[MEM @ epoch 78 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 79/499 (16% done) --- + [Epoch 79][10/40] loss=0.859642 avg=0.875033 VRAM=38.9GiB | 15.8% done | ETA(epoch): 691s + [Epoch 79][20/40] loss=0.830667 avg=0.862525 VRAM=38.8GiB | 15.9% done | ETA(epoch): 461s + [Epoch 79][30/40] loss=0.895966 avg=0.861180 VRAM=38.9GiB | 16.0% done | ETA(epoch): 231s + [Epoch 79][40/40] loss=0.754532 avg=0.861657 VRAM=38.8GiB | 16.0% done | ETA(epoch): 0s + Train loss: 0.861657 (922.0s) ETA: 7038min + Val loss: 0.883056 [t_0.0-0.2=1.0776 t_0.2-0.4=0.9656 t_0.4-0.6=0.8728 t_0.6-0.8=0.7318 t_0.8-1.0=0.6855] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0079 +[MEM @ epoch 79 end] RAM: 16.5/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 80/499 (16% done) --- + [Epoch 80][10/40] loss=0.923736 avg=0.853771 VRAM=38.9GiB | 16.1% done | ETA(epoch): 692s + [Epoch 80][20/40] loss=0.809863 avg=0.859998 VRAM=38.8GiB | 16.1% done | ETA(epoch): 461s + [Epoch 80][30/40] loss=0.886301 avg=0.869552 VRAM=38.9GiB | 16.2% done | ETA(epoch): 231s + [Epoch 80][40/40] loss=0.884564 avg=0.866433 VRAM=38.8GiB | 16.2% done | ETA(epoch): 0s + Train loss: 0.866433 (922.4s) ETA: 7020min + Val loss: 0.852909 [t_0.0-0.2=1.0861 t_0.2-0.4=0.9905 t_0.4-0.6=0.8790 t_0.6-0.8=0.7206 t_0.8-1.0=0.6703] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0080 + Deleted old checkpoint: checkpoint_epoch_0077 +[MEM @ epoch 80 end] RAM: 16.1/188.4 GiB (8.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 81/499 (16% done) --- + [Epoch 81][10/40] loss=0.987827 avg=0.893397 VRAM=38.9GiB | 16.2% done | ETA(epoch): 690s + [Epoch 81][20/40] loss=0.938329 avg=0.876877 VRAM=38.8GiB | 16.3% done | ETA(epoch): 460s + [Epoch 81][30/40] loss=0.906253 avg=0.884220 VRAM=38.9GiB | 16.4% done | ETA(epoch): 230s + [Epoch 81][40/40] loss=0.973789 avg=0.877544 VRAM=38.8GiB | 16.4% done | ETA(epoch): 0s + Train loss: 0.877544 (921.1s) ETA: 7002min + Val loss: 0.878373 [t_0.0-0.2=1.0824 t_0.2-0.4=0.9813 t_0.4-0.6=0.8744 t_0.6-0.8=0.7199 t_0.8-1.0=0.6711] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0081 + Deleted old checkpoint: checkpoint_epoch_0078 +[MEM @ epoch 81 end] RAM: 15.7/188.4 GiB (8.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 82/499 (16% done) --- + [Epoch 82][10/40] loss=0.852492 avg=0.881280 VRAM=38.9GiB | 16.4% done | ETA(epoch): 691s + [Epoch 82][20/40] loss=0.866674 avg=0.865465 VRAM=38.8GiB | 16.5% done | ETA(epoch): 460s + [Epoch 82][30/40] loss=0.866328 avg=0.873085 VRAM=38.9GiB | 16.6% done | ETA(epoch): 230s + [Epoch 82][40/40] loss=0.914845 avg=0.872533 VRAM=38.8GiB | 16.6% done | ETA(epoch): 0s + Train loss: 0.872533 (921.0s) ETA: 6984min + Val loss: 0.877124 [t_0.0-0.2=1.0791 t_0.2-0.4=1.0173 t_0.4-0.6=0.8413 t_0.6-0.8=0.7418 t_0.8-1.0=0.6518] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0082 + Deleted old checkpoint: checkpoint_epoch_0079 +[MEM @ epoch 82 end] RAM: 16.0/188.4 GiB (8.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 83/499 (17% done) --- + [Epoch 83][10/40] loss=0.856430 avg=0.880743 VRAM=38.9GiB | 16.7% done | ETA(epoch): 690s + [Epoch 83][20/40] loss=0.923773 avg=0.884446 VRAM=38.8GiB | 16.7% done | ETA(epoch): 460s + [Epoch 83][30/40] loss=0.975934 avg=0.877499 VRAM=38.9GiB | 16.8% done | ETA(epoch): 230s + [Epoch 83][40/40] loss=0.949035 avg=0.870033 VRAM=38.8GiB | 16.8% done | ETA(epoch): 0s + Train loss: 0.870033 (921.0s) ETA: 6965min + Val loss: 0.875330 [t_0.0-0.2=1.0645 t_0.2-0.4=1.0277 t_0.4-0.6=0.8378 t_0.6-0.8=0.7307 t_0.8-1.0=0.6876] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0083 + Deleted old checkpoint: checkpoint_epoch_0080 +[MEM @ epoch 83 end] RAM: 15.6/188.4 GiB (8.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 84/499 (17% done) --- + [Epoch 84][10/40] loss=0.877433 avg=0.861950 VRAM=38.9GiB | 16.9% done | ETA(epoch): 691s + [Epoch 84][20/40] loss=0.834065 avg=0.867731 VRAM=38.8GiB | 16.9% done | ETA(epoch): 461s + [Epoch 84][30/40] loss=0.881413 avg=0.871805 VRAM=38.9GiB | 17.0% done | ETA(epoch): 230s + [Epoch 84][40/40] loss=0.873489 avg=0.877266 VRAM=38.8GiB | 17.0% done | ETA(epoch): 0s + Train loss: 0.877266 (921.0s) ETA: 6947min + Val loss: 0.880792 [t_0.0-0.2=1.0920 t_0.2-0.4=0.9951 t_0.4-0.6=0.8258 t_0.6-0.8=0.7386 t_0.8-1.0=0.6763] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0084 + Deleted old checkpoint: checkpoint_epoch_0081 +[MEM @ epoch 84 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 85/499 (17% done) --- + [Epoch 85][10/40] loss=0.897206 avg=0.813634 VRAM=38.9GiB | 17.1% done | ETA(epoch): 690s + [Epoch 85][20/40] loss=0.821457 avg=0.825348 VRAM=38.8GiB | 17.1% done | ETA(epoch): 461s + [Epoch 85][30/40] loss=0.818686 avg=0.828175 VRAM=38.9GiB | 17.2% done | ETA(epoch): 230s + [Epoch 85][40/40] loss=0.813937 avg=0.840197 VRAM=38.8GiB | 17.2% done | ETA(epoch): 0s + Train loss: 0.840197 (921.1s) ETA: 6929min + Val loss: 0.849144 [t_0.0-0.2=1.0670 t_0.2-0.4=1.0062 t_0.4-0.6=0.8543 t_0.6-0.8=0.7220 t_0.8-1.0=0.6662] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0085 + Deleted old checkpoint: checkpoint_epoch_0082 +[MEM @ epoch 85 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 86/499 (17% done) --- + [Epoch 86][10/40] loss=0.739026 avg=0.848772 VRAM=38.9GiB | 17.2% done | ETA(epoch): 691s + [Epoch 86][20/40] loss=0.848778 avg=0.850984 VRAM=38.8GiB | 17.3% done | ETA(epoch): 461s + [Epoch 86][30/40] loss=0.947612 avg=0.849888 VRAM=38.9GiB | 17.3% done | ETA(epoch): 230s + [Epoch 86][40/40] loss=0.838615 avg=0.845703 VRAM=38.8GiB | 17.4% done | ETA(epoch): 0s + Train loss: 0.845703 (921.6s) ETA: 6911min + Val loss: 0.888687 [t_0.0-0.2=1.0821 t_0.2-0.4=0.9901 t_0.4-0.6=0.8692 t_0.6-0.8=0.7168 t_0.8-1.0=0.6720] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0086 + Deleted old checkpoint: checkpoint_epoch_0083 +[MEM @ epoch 86 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 87/499 (17% done) --- + [Epoch 87][10/40] loss=0.926583 avg=0.882239 VRAM=38.9GiB | 17.4% done | ETA(epoch): 691s + [Epoch 87][20/40] loss=0.844799 avg=0.876121 VRAM=38.8GiB | 17.5% done | ETA(epoch): 461s + [Epoch 87][30/40] loss=0.970978 avg=0.873164 VRAM=38.9GiB | 17.5% done | ETA(epoch): 230s + [Epoch 87][40/40] loss=0.895499 avg=0.873331 VRAM=38.8GiB | 17.6% done | ETA(epoch): 0s + Train loss: 0.873331 (921.7s) ETA: 6893min + Val loss: 0.879613 [t_0.0-0.2=1.0850 t_0.2-0.4=0.9855 t_0.4-0.6=0.8452 t_0.6-0.8=0.7473 t_0.8-1.0=0.6623] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0087 + Deleted old checkpoint: checkpoint_epoch_0084 +[MEM @ epoch 87 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 88/499 (18% done) --- + [Epoch 88][10/40] loss=0.770442 avg=0.872201 VRAM=38.9GiB | 17.6% done | ETA(epoch): 691s + [Epoch 88][20/40] loss=0.882597 avg=0.875423 VRAM=38.8GiB | 17.7% done | ETA(epoch): 460s + [Epoch 88][30/40] loss=0.806839 avg=0.864299 VRAM=38.9GiB | 17.8% done | ETA(epoch): 230s + [Epoch 88][40/40] loss=0.814939 avg=0.862696 VRAM=38.8GiB | 17.8% done | ETA(epoch): 0s + Train loss: 0.862696 (920.0s) ETA: 6875min + Val loss: 0.862893 [t_0.0-0.2=1.0789 t_0.2-0.4=0.9988 t_0.4-0.6=0.8660 t_0.6-0.8=0.7385 t_0.8-1.0=0.6708] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0088 + Deleted old checkpoint: checkpoint_epoch_0085 +[MEM @ epoch 88 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 89/499 (18% done) --- + [Epoch 89][10/40] loss=0.933978 avg=0.866480 VRAM=38.9GiB | 17.8% done | ETA(epoch): 690s + [Epoch 89][20/40] loss=0.853483 avg=0.860853 VRAM=38.8GiB | 17.9% done | ETA(epoch): 460s + [Epoch 89][30/40] loss=0.805008 avg=0.861385 VRAM=38.9GiB | 17.9% done | ETA(epoch): 230s + [Epoch 89][40/40] loss=0.874681 avg=0.867300 VRAM=38.8GiB | 18.0% done | ETA(epoch): 0s + Train loss: 0.867300 (920.7s) ETA: 6857min + Val loss: 0.850357 [t_0.0-0.2=1.0662 t_0.2-0.4=1.0000 t_0.4-0.6=0.8639 t_0.6-0.8=0.7451 t_0.8-1.0=0.6775] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0089 + Deleted old checkpoint: checkpoint_epoch_0086 +[MEM @ epoch 89 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 90/499 (18% done) --- + [Epoch 90][10/40] loss=0.861763 avg=0.864038 VRAM=38.9GiB | 18.1% done | ETA(epoch): 691s + [Epoch 90][20/40] loss=0.980793 avg=0.872808 VRAM=38.8GiB | 18.1% done | ETA(epoch): 461s + [Epoch 90][30/40] loss=0.793121 avg=0.858280 VRAM=38.9GiB | 18.1% done | ETA(epoch): 230s + [Epoch 90][40/40] loss=0.796975 avg=0.861049 VRAM=38.8GiB | 18.2% done | ETA(epoch): 0s + Train loss: 0.861049 (921.6s) ETA: 6839min + Val loss: 0.860423 [t_0.0-0.2=1.0684 t_0.2-0.4=0.9938 t_0.4-0.6=0.8318 t_0.6-0.8=0.7218 t_0.8-1.0=0.6629] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0090 + Deleted old checkpoint: checkpoint_epoch_0087 +[MEM @ epoch 90 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 91/499 (18% done) --- + [Epoch 91][10/40] loss=0.838749 avg=0.833380 VRAM=38.9GiB | 18.2% done | ETA(epoch): 691s + [Epoch 91][20/40] loss=0.858340 avg=0.849734 VRAM=38.8GiB | 18.3% done | ETA(epoch): 460s + [Epoch 91][30/40] loss=0.723801 avg=0.845156 VRAM=38.9GiB | 18.4% done | ETA(epoch): 230s + [Epoch 91][40/40] loss=1.015142 avg=0.850297 VRAM=38.8GiB | 18.4% done | ETA(epoch): 0s + Train loss: 0.850297 (920.8s) ETA: 6821min + Val loss: 0.892100 [t_0.0-0.2=1.0741 t_0.2-0.4=0.9827 t_0.4-0.6=0.8582 t_0.6-0.8=0.7348 t_0.8-1.0=0.6619] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0091 + Deleted old checkpoint: checkpoint_epoch_0088 +[MEM @ epoch 91 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 92/499 (18% done) --- + [Epoch 92][10/40] loss=0.924043 avg=0.897979 VRAM=38.9GiB | 18.4% done | ETA(epoch): 691s + [Epoch 92][20/40] loss=0.938318 avg=0.894619 VRAM=38.8GiB | 18.5% done | ETA(epoch): 460s + [Epoch 92][30/40] loss=0.933050 avg=0.892801 VRAM=38.9GiB | 18.6% done | ETA(epoch): 230s + [Epoch 92][40/40] loss=1.006365 avg=0.880486 VRAM=38.8GiB | 18.6% done | ETA(epoch): 0s + Train loss: 0.880486 (921.0s) ETA: 6803min + Val loss: 0.864303 [t_0.0-0.2=1.0701 t_0.2-0.4=1.0109 t_0.4-0.6=0.8191 t_0.6-0.8=0.7480 t_0.8-1.0=0.6646] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0092 + Deleted old checkpoint: checkpoint_epoch_0089 +[MEM @ epoch 92 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 93/499 (19% done) --- + [Epoch 93][10/40] loss=0.835578 avg=0.831834 VRAM=38.9GiB | 18.6% done | ETA(epoch): 690s + [Epoch 93][20/40] loss=0.830789 avg=0.856446 VRAM=38.8GiB | 18.7% done | ETA(epoch): 460s + [Epoch 93][30/40] loss=0.926485 avg=0.855738 VRAM=38.9GiB | 18.8% done | ETA(epoch): 230s + [Epoch 93][40/40] loss=0.956350 avg=0.857992 VRAM=38.8GiB | 18.8% done | ETA(epoch): 0s + Train loss: 0.857992 (920.7s) ETA: 6785min + Val loss: 0.850798 [t_0.0-0.2=1.0646 t_0.2-0.4=1.0161 t_0.4-0.6=0.8453 t_0.6-0.8=0.7383 t_0.8-1.0=0.6595] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0093 + Deleted old checkpoint: checkpoint_epoch_0090 +[MEM @ epoch 93 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 94/499 (19% done) --- + [Epoch 94][10/40] loss=0.897231 avg=0.866365 VRAM=38.9GiB | 18.9% done | ETA(epoch): 690s + [Epoch 94][20/40] loss=0.920908 avg=0.866408 VRAM=38.8GiB | 18.9% done | ETA(epoch): 460s + [Epoch 94][30/40] loss=0.874897 avg=0.862036 VRAM=38.9GiB | 18.9% done | ETA(epoch): 230s + [Epoch 94][40/40] loss=0.905408 avg=0.866323 VRAM=38.8GiB | 19.0% done | ETA(epoch): 0s + Train loss: 0.866323 (921.0s) ETA: 6767min + Val loss: 0.844855 [t_0.0-0.2=1.0812 t_0.2-0.4=1.0164 t_0.4-0.6=0.8436 t_0.6-0.8=0.7020 t_0.8-1.0=0.6647] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0094 + Deleted old checkpoint: checkpoint_epoch_0091 +[MEM @ epoch 94 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 95/499 (19% done) --- + [Epoch 95][10/40] loss=0.790580 avg=0.873985 VRAM=38.9GiB | 19.1% done | ETA(epoch): 691s + [Epoch 95][20/40] loss=0.850082 avg=0.861448 VRAM=38.8GiB | 19.1% done | ETA(epoch): 460s + [Epoch 95][30/40] loss=0.815722 avg=0.854677 VRAM=38.9GiB | 19.1% done | ETA(epoch): 230s + [Epoch 95][40/40] loss=0.877943 avg=0.846558 VRAM=38.8GiB | 19.2% done | ETA(epoch): 0s + Train loss: 0.846558 (921.0s) ETA: 6749min + Val loss: 0.869236 [t_0.0-0.2=1.0716 t_0.2-0.4=0.9894 t_0.4-0.6=0.8336 t_0.6-0.8=0.7191 t_0.8-1.0=0.6706] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0095 + Deleted old checkpoint: checkpoint_epoch_0092 +[MEM @ epoch 95 end] RAM: 16.2/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 96/499 (19% done) --- + [Epoch 96][10/40] loss=0.839661 avg=0.853407 VRAM=38.9GiB | 19.2% done | ETA(epoch): 690s + [Epoch 96][20/40] loss=0.884362 avg=0.869501 VRAM=38.8GiB | 19.3% done | ETA(epoch): 460s + [Epoch 96][30/40] loss=0.798925 avg=0.866678 VRAM=38.9GiB | 19.4% done | ETA(epoch): 230s + [Epoch 96][40/40] loss=0.820816 avg=0.858409 VRAM=38.8GiB | 19.4% done | ETA(epoch): 0s + Train loss: 0.858409 (920.1s) ETA: 6731min + Val loss: 0.839939 [t_0.0-0.2=1.0782 t_0.2-0.4=0.9963 t_0.4-0.6=0.8413 t_0.6-0.8=0.7087 t_0.8-1.0=0.6712] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0096 + Deleted old checkpoint: checkpoint_epoch_0093 +[MEM @ epoch 96 end] RAM: 16.3/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 97/499 (19% done) --- + [Epoch 97][10/40] loss=0.847378 avg=0.832939 VRAM=38.9GiB | 19.4% done | ETA(epoch): 689s + [Epoch 97][20/40] loss=0.738044 avg=0.850284 VRAM=38.8GiB | 19.5% done | ETA(epoch): 460s + [Epoch 97][30/40] loss=0.789444 avg=0.850050 VRAM=38.9GiB | 19.6% done | ETA(epoch): 230s + [Epoch 97][40/40] loss=0.865499 avg=0.852425 VRAM=38.8GiB | 19.6% done | ETA(epoch): 0s + Train loss: 0.852425 (919.8s) ETA: 6714min + Val loss: 0.888451 [t_0.0-0.2=1.0792 t_0.2-0.4=1.0213 t_0.4-0.6=0.8308 t_0.6-0.8=0.7116 t_0.8-1.0=0.6834] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0097 + Deleted old checkpoint: checkpoint_epoch_0094 +[MEM @ epoch 97 end] RAM: 16.4/188.4 GiB (8.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 98/499 (20% done) --- + [Epoch 98][10/40] loss=0.911340 avg=0.867172 VRAM=38.9GiB | 19.7% done | ETA(epoch): 690s + [Epoch 98][20/40] loss=0.867495 avg=0.862394 VRAM=38.8GiB | 19.7% done | ETA(epoch): 460s + [Epoch 98][30/40] loss=0.920593 avg=0.854444 VRAM=38.9GiB | 19.8% done | ETA(epoch): 230s + [Epoch 98][40/40] loss=0.831199 avg=0.861505 VRAM=38.8GiB | 19.8% done | ETA(epoch): 0s + Train loss: 0.861505 (920.2s) ETA: 6696min + Val loss: 0.828597 [t_0.0-0.2=1.0711 t_0.2-0.4=0.9709 t_0.4-0.6=0.8533 t_0.6-0.8=0.7315 t_0.8-1.0=0.6485] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0098 (BEST) + Deleted old checkpoint: checkpoint_epoch_0076 + Deleted old checkpoint: checkpoint_epoch_0095 +[MEM @ epoch 98 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 99/499 (20% done) --- + [Epoch 99][10/40] loss=0.896135 avg=0.868731 VRAM=38.9GiB | 19.9% done | ETA(epoch): 690s + [Epoch 99][20/40] loss=0.814269 avg=0.871922 VRAM=38.8GiB | 19.9% done | ETA(epoch): 460s + [Epoch 99][30/40] loss=0.818913 avg=0.874898 VRAM=38.9GiB | 20.0% done | ETA(epoch): 230s + [Epoch 99][40/40] loss=0.753119 avg=0.867570 VRAM=38.8GiB | 20.0% done | ETA(epoch): 0s + Train loss: 0.867570 (920.4s) ETA: 6678min + Val loss: 0.846439 [t_0.0-0.2=1.0729 t_0.2-0.4=0.9856 t_0.4-0.6=0.8393 t_0.6-0.8=0.7123 t_0.8-1.0=0.6661] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0099 + Deleted old checkpoint: checkpoint_epoch_0096 +[MEM @ epoch 99 end] RAM: 16.3/188.4 GiB (8.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 100/499 (20% done) --- + [MilestoneVis] train_0 step 4000 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 4000 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 4000 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 4000 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 4000 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 4000 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 4000 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 4000 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 4000 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 4000 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 100][10/40] loss=0.864842 avg=0.887641 VRAM=38.9GiB | 20.1% done | ETA(epoch): 2931s + [Epoch 100][20/40] loss=0.829476 avg=0.872413 VRAM=38.8GiB | 20.1% done | ETA(epoch): 1207s + [Epoch 100][30/40] loss=0.880342 avg=0.863603 VRAM=38.9GiB | 20.2% done | ETA(epoch): 479s + [Epoch 100][40/40] loss=0.785638 avg=0.862257 VRAM=38.8GiB | 20.2% done | ETA(epoch): 0s + Train loss: 0.862257 (1667.4s) ETA: 6710min + Val loss: 0.867839 [t_0.0-0.2=1.0811 t_0.2-0.4=0.9923 t_0.4-0.6=0.8446 t_0.6-0.8=0.6922 t_0.8-1.0=0.6658] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0100 + Deleted old checkpoint: checkpoint_epoch_0097 +[MEM @ epoch 100 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 101/499 (20% done) --- + [Epoch 101][10/40] loss=0.872525 avg=0.834250 VRAM=38.9GiB | 20.2% done | ETA(epoch): 690s + [Epoch 101][20/40] loss=0.815138 avg=0.847522 VRAM=38.8GiB | 20.3% done | ETA(epoch): 460s + [Epoch 101][30/40] loss=0.920624 avg=0.848170 VRAM=38.9GiB | 20.3% done | ETA(epoch): 230s + [Epoch 101][40/40] loss=0.812253 avg=0.860978 VRAM=38.8GiB | 20.4% done | ETA(epoch): 0s + Train loss: 0.860978 (920.7s) ETA: 6691min + Val loss: 0.858458 [t_0.0-0.2=1.0775 t_0.2-0.4=0.9706 t_0.4-0.6=0.8756 t_0.6-0.8=0.7150 t_0.8-1.0=0.6392] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0101 +[MEM @ epoch 101 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 102/499 (20% done) --- + [Epoch 102][10/40] loss=0.736305 avg=0.829274 VRAM=38.9GiB | 20.4% done | ETA(epoch): 691s + [Epoch 102][20/40] loss=0.911816 avg=0.844526 VRAM=38.8GiB | 20.5% done | ETA(epoch): 461s + [Epoch 102][30/40] loss=0.748860 avg=0.839345 VRAM=38.9GiB | 20.5% done | ETA(epoch): 230s + [Epoch 102][40/40] loss=0.844191 avg=0.837623 VRAM=38.8GiB | 20.6% done | ETA(epoch): 0s + Train loss: 0.837623 (921.5s) ETA: 6673min + Val loss: 0.834782 [t_0.0-0.2=1.0643 t_0.2-0.4=0.9717 t_0.4-0.6=0.8497 t_0.6-0.8=0.7376 t_0.8-1.0=0.6724] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0102 + Deleted old checkpoint: checkpoint_epoch_0099 +[MEM @ epoch 102 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 103/499 (21% done) --- + [Epoch 103][10/40] loss=0.865799 avg=0.803045 VRAM=38.9GiB | 20.6% done | ETA(epoch): 689s + [Epoch 103][20/40] loss=0.755609 avg=0.829569 VRAM=38.8GiB | 20.7% done | ETA(epoch): 460s + [Epoch 103][30/40] loss=0.832852 avg=0.832636 VRAM=38.9GiB | 20.8% done | ETA(epoch): 230s + [Epoch 103][40/40] loss=0.927700 avg=0.842785 VRAM=38.8GiB | 20.8% done | ETA(epoch): 0s + Train loss: 0.842785 (919.6s) ETA: 6655min + Val loss: 0.862036 [t_0.0-0.2=1.0794 t_0.2-0.4=1.0048 t_0.4-0.6=0.8158 t_0.6-0.8=0.7125 t_0.8-1.0=0.6647] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0103 + Deleted old checkpoint: checkpoint_epoch_0100 +[MEM @ epoch 103 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 104/499 (21% done) --- + [Epoch 104][10/40] loss=0.867812 avg=0.877313 VRAM=38.9GiB | 20.8% done | ETA(epoch): 690s + [Epoch 104][20/40] loss=0.801928 avg=0.891679 VRAM=38.8GiB | 20.9% done | ETA(epoch): 460s + [Epoch 104][30/40] loss=0.903984 avg=0.883098 VRAM=38.9GiB | 20.9% done | ETA(epoch): 230s + [Epoch 104][40/40] loss=0.802039 avg=0.872879 VRAM=38.8GiB | 21.0% done | ETA(epoch): 0s + Train loss: 0.872879 (920.1s) ETA: 6637min + Val loss: 0.852041 [t_0.0-0.2=1.0711 t_0.2-0.4=0.9593 t_0.4-0.6=0.8663 t_0.6-0.8=0.7086 t_0.8-1.0=0.6549] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0104 + Deleted old checkpoint: checkpoint_epoch_0101 +[MEM @ epoch 104 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 105/499 (21% done) --- + [Epoch 105][10/40] loss=0.872604 avg=0.873585 VRAM=38.9GiB | 21.1% done | ETA(epoch): 691s + [Epoch 105][20/40] loss=0.863565 avg=0.874786 VRAM=38.8GiB | 21.1% done | ETA(epoch): 460s + [Epoch 105][30/40] loss=0.920411 avg=0.871403 VRAM=38.9GiB | 21.1% done | ETA(epoch): 230s + [Epoch 105][40/40] loss=0.923054 avg=0.867385 VRAM=38.8GiB | 21.2% done | ETA(epoch): 0s + Train loss: 0.867385 (920.6s) ETA: 6618min + Val loss: 0.851589 [t_0.0-0.2=1.0878 t_0.2-0.4=1.0090 t_0.4-0.6=0.8321 t_0.6-0.8=0.7049 t_0.8-1.0=0.6641] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0105 + Deleted old checkpoint: checkpoint_epoch_0102 +[MEM @ epoch 105 end] RAM: 16.8/188.4 GiB (8.9%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 106/499 (21% done) --- + [Epoch 106][10/40] loss=0.872432 avg=0.892363 VRAM=38.9GiB | 21.2% done | ETA(epoch): 691s + [Epoch 106][20/40] loss=0.801866 avg=0.882887 VRAM=38.8GiB | 21.3% done | ETA(epoch): 461s + [Epoch 106][30/40] loss=0.828291 avg=0.881467 VRAM=38.9GiB | 21.3% done | ETA(epoch): 230s + [Epoch 106][40/40] loss=0.891977 avg=0.884873 VRAM=38.8GiB | 21.4% done | ETA(epoch): 0s + Train loss: 0.884873 (921.7s) ETA: 6600min + Val loss: 0.854579 [t_0.0-0.2=1.1033 t_0.2-0.4=0.9952 t_0.4-0.6=0.8585 t_0.6-0.8=0.7284 t_0.8-1.0=0.6484] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0106 + Deleted old checkpoint: checkpoint_epoch_0103 +[MEM @ epoch 106 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 107/499 (21% done) --- + [Epoch 107][10/40] loss=0.794021 avg=0.831186 VRAM=38.9GiB | 21.4% done | ETA(epoch): 691s + [Epoch 107][20/40] loss=0.932900 avg=0.847314 VRAM=38.8GiB | 21.5% done | ETA(epoch): 461s + [Epoch 107][30/40] loss=0.907162 avg=0.857428 VRAM=38.9GiB | 21.6% done | ETA(epoch): 230s + [Epoch 107][40/40] loss=0.909137 avg=0.851179 VRAM=38.8GiB | 21.6% done | ETA(epoch): 0s + Train loss: 0.851179 (921.1s) ETA: 6582min + Val loss: 0.876219 [t_0.0-0.2=1.0822 t_0.2-0.4=1.0048 t_0.4-0.6=0.8631 t_0.6-0.8=0.6951 t_0.8-1.0=0.6584] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0107 + Deleted old checkpoint: checkpoint_epoch_0104 +[MEM @ epoch 107 end] RAM: 16.9/188.4 GiB (8.9%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 108/499 (22% done) --- + [Epoch 108][10/40] loss=0.816735 avg=0.841633 VRAM=38.9GiB | 21.6% done | ETA(epoch): 691s + [Epoch 108][20/40] loss=0.817799 avg=0.834745 VRAM=38.8GiB | 21.7% done | ETA(epoch): 460s + [Epoch 108][30/40] loss=0.847495 avg=0.844082 VRAM=38.9GiB | 21.8% done | ETA(epoch): 230s + [Epoch 108][40/40] loss=0.805010 avg=0.842072 VRAM=38.8GiB | 21.8% done | ETA(epoch): 0s + Train loss: 0.842072 (921.2s) ETA: 6564min + Val loss: 0.863949 [t_0.0-0.2=1.0676 t_0.2-0.4=1.0309 t_0.4-0.6=0.8173 t_0.6-0.8=0.7229 t_0.8-1.0=0.6663] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0108 + Deleted old checkpoint: checkpoint_epoch_0105 +[MEM @ epoch 108 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 109/499 (22% done) --- + [Epoch 109][10/40] loss=0.856237 avg=0.844240 VRAM=38.9GiB | 21.9% done | ETA(epoch): 691s + [Epoch 109][20/40] loss=0.711840 avg=0.843721 VRAM=38.8GiB | 21.9% done | ETA(epoch): 461s + [Epoch 109][30/40] loss=0.965462 avg=0.849479 VRAM=38.9GiB | 21.9% done | ETA(epoch): 230s + [Epoch 109][40/40] loss=0.870386 avg=0.855437 VRAM=38.8GiB | 22.0% done | ETA(epoch): 0s + Train loss: 0.855437 (920.9s) ETA: 6546min + Val loss: 0.871418 [t_0.0-0.2=1.0548 t_0.2-0.4=1.0092 t_0.4-0.6=0.8369 t_0.6-0.8=0.7179 t_0.8-1.0=0.6619] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0109 + Deleted old checkpoint: checkpoint_epoch_0106 +[MEM @ epoch 109 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 110/499 (22% done) --- + [Epoch 110][10/40] loss=0.842796 avg=0.849246 VRAM=38.9GiB | 22.1% done | ETA(epoch): 691s + [Epoch 110][20/40] loss=0.794438 avg=0.860909 VRAM=38.8GiB | 22.1% done | ETA(epoch): 460s + [Epoch 110][30/40] loss=0.843153 avg=0.863396 VRAM=38.9GiB | 22.1% done | ETA(epoch): 230s + [Epoch 110][40/40] loss=0.858501 avg=0.865458 VRAM=38.8GiB | 22.2% done | ETA(epoch): 0s + Train loss: 0.865458 (921.1s) ETA: 6528min + Val loss: 0.856781 [t_0.0-0.2=1.0725 t_0.2-0.4=1.0264 t_0.4-0.6=0.8659 t_0.6-0.8=0.7365 t_0.8-1.0=0.6764] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0110 + Deleted old checkpoint: checkpoint_epoch_0107 +[MEM @ epoch 110 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 111/499 (22% done) --- + [Epoch 111][10/40] loss=0.769947 avg=0.877432 VRAM=38.9GiB | 22.2% done | ETA(epoch): 690s + [Epoch 111][20/40] loss=0.885211 avg=0.859397 VRAM=38.8GiB | 22.3% done | ETA(epoch): 460s + [Epoch 111][30/40] loss=0.800124 avg=0.857645 VRAM=38.9GiB | 22.4% done | ETA(epoch): 230s + [Epoch 111][40/40] loss=0.910398 avg=0.862093 VRAM=38.8GiB | 22.4% done | ETA(epoch): 0s + Train loss: 0.862093 (920.7s) ETA: 6511min + Val loss: 0.877696 [t_0.0-0.2=1.0791 t_0.2-0.4=0.9861 t_0.4-0.6=0.8347 t_0.6-0.8=0.6939 t_0.8-1.0=0.6687] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0111 + Deleted old checkpoint: checkpoint_epoch_0108 +[MEM @ epoch 111 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 112/499 (22% done) --- + [Epoch 112][10/40] loss=0.940962 avg=0.863525 VRAM=38.9GiB | 22.4% done | ETA(epoch): 691s + [Epoch 112][20/40] loss=0.744975 avg=0.854042 VRAM=38.8GiB | 22.5% done | ETA(epoch): 460s + [Epoch 112][30/40] loss=0.958508 avg=0.858957 VRAM=38.9GiB | 22.6% done | ETA(epoch): 230s + [Epoch 112][40/40] loss=0.916952 avg=0.854061 VRAM=38.8GiB | 22.6% done | ETA(epoch): 0s + Train loss: 0.854061 (920.8s) ETA: 6493min + Val loss: 0.844576 [t_0.0-0.2=1.0592 t_0.2-0.4=0.9677 t_0.4-0.6=0.8499 t_0.6-0.8=0.7055 t_0.8-1.0=0.6480] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0112 + Deleted old checkpoint: checkpoint_epoch_0109 +[MEM @ epoch 112 end] RAM: 16.8/188.4 GiB (8.9%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 113/499 (23% done) --- + [Epoch 113][10/40] loss=0.941333 avg=0.869726 VRAM=38.9GiB | 22.7% done | ETA(epoch): 691s + [Epoch 113][20/40] loss=0.934096 avg=0.860514 VRAM=38.8GiB | 22.7% done | ETA(epoch): 461s + [Epoch 113][30/40] loss=0.968325 avg=0.866090 VRAM=38.9GiB | 22.8% done | ETA(epoch): 230s + [Epoch 113][40/40] loss=0.812558 avg=0.866101 VRAM=38.8GiB | 22.8% done | ETA(epoch): 0s + Train loss: 0.866101 (921.0s) ETA: 6475min + Val loss: 0.851463 [t_0.0-0.2=1.0790 t_0.2-0.4=1.0192 t_0.4-0.6=0.8196 t_0.6-0.8=0.7175 t_0.8-1.0=0.6527] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0113 + Deleted old checkpoint: checkpoint_epoch_0110 +[MEM @ epoch 113 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 114/499 (23% done) --- + [Epoch 114][10/40] loss=0.817446 avg=0.838759 VRAM=38.9GiB | 22.9% done | ETA(epoch): 691s + [Epoch 114][20/40] loss=0.825273 avg=0.861737 VRAM=38.8GiB | 22.9% done | ETA(epoch): 461s + [Epoch 114][30/40] loss=0.868479 avg=0.864563 VRAM=38.9GiB | 22.9% done | ETA(epoch): 230s + [Epoch 114][40/40] loss=0.829676 avg=0.858604 VRAM=38.8GiB | 23.0% done | ETA(epoch): 0s + Train loss: 0.858604 (921.5s) ETA: 6457min + Val loss: 0.859023 [t_0.0-0.2=1.0763 t_0.2-0.4=1.0173 t_0.4-0.6=0.7929 t_0.6-0.8=0.7098 t_0.8-1.0=0.6436] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0114 + Deleted old checkpoint: checkpoint_epoch_0111 +[MEM @ epoch 114 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 115/499 (23% done) --- + [Epoch 115][10/40] loss=0.786318 avg=0.814930 VRAM=38.9GiB | 23.1% done | ETA(epoch): 690s + [Epoch 115][20/40] loss=0.869902 avg=0.837254 VRAM=38.8GiB | 23.1% done | ETA(epoch): 460s + [Epoch 115][30/40] loss=0.861249 avg=0.846595 VRAM=38.9GiB | 23.2% done | ETA(epoch): 230s + [Epoch 115][40/40] loss=0.870361 avg=0.847843 VRAM=38.8GiB | 23.2% done | ETA(epoch): 0s + Train loss: 0.847843 (920.4s) ETA: 6439min + Val loss: 0.873000 [t_0.0-0.2=1.0694 t_0.2-0.4=1.0351 t_0.4-0.6=0.8383 t_0.6-0.8=0.7126 t_0.8-1.0=0.6557] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0115 + Deleted old checkpoint: checkpoint_epoch_0112 +[MEM @ epoch 115 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 116/499 (23% done) --- + [Epoch 116][10/40] loss=0.779105 avg=0.842175 VRAM=38.9GiB | 23.2% done | ETA(epoch): 691s + [Epoch 116][20/40] loss=0.793950 avg=0.856691 VRAM=38.8GiB | 23.3% done | ETA(epoch): 460s + [Epoch 116][30/40] loss=0.904724 avg=0.855544 VRAM=38.9GiB | 23.4% done | ETA(epoch): 230s + [Epoch 116][40/40] loss=0.911688 avg=0.860297 VRAM=38.8GiB | 23.4% done | ETA(epoch): 0s + Train loss: 0.860297 (921.1s) ETA: 6421min + Val loss: 0.856344 [t_0.0-0.2=1.0776 t_0.2-0.4=0.9913 t_0.4-0.6=0.8512 t_0.6-0.8=0.7058 t_0.8-1.0=0.6568] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0116 + Deleted old checkpoint: checkpoint_epoch_0113 +[MEM @ epoch 116 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 117/499 (23% done) --- + [Epoch 117][10/40] loss=0.919393 avg=0.829120 VRAM=38.9GiB | 23.4% done | ETA(epoch): 691s + [Epoch 117][20/40] loss=0.810725 avg=0.838677 VRAM=38.8GiB | 23.5% done | ETA(epoch): 461s + [Epoch 117][30/40] loss=0.894903 avg=0.834047 VRAM=38.9GiB | 23.5% done | ETA(epoch): 230s + [Epoch 117][40/40] loss=0.788929 avg=0.842373 VRAM=38.8GiB | 23.6% done | ETA(epoch): 0s + Train loss: 0.842373 (920.8s) ETA: 6403min + Val loss: 0.861169 [t_0.0-0.2=1.0746 t_0.2-0.4=0.9864 t_0.4-0.6=0.8677 t_0.6-0.8=0.7235 t_0.8-1.0=0.6454] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0117 + Deleted old checkpoint: checkpoint_epoch_0114 +[MEM @ epoch 117 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 118/499 (24% done) --- + [Epoch 118][10/40] loss=0.763810 avg=0.845901 VRAM=38.9GiB | 23.6% done | ETA(epoch): 690s + [Epoch 118][20/40] loss=0.769708 avg=0.854510 VRAM=38.8GiB | 23.7% done | ETA(epoch): 460s + [Epoch 118][30/40] loss=0.738623 avg=0.847384 VRAM=38.9GiB | 23.8% done | ETA(epoch): 230s + [Epoch 118][40/40] loss=0.782315 avg=0.849622 VRAM=38.8GiB | 23.8% done | ETA(epoch): 0s + Train loss: 0.849622 (920.8s) ETA: 6386min + Val loss: 0.856364 [t_0.0-0.2=1.0730 t_0.2-0.4=1.0002 t_0.4-0.6=0.8686 t_0.6-0.8=0.7082 t_0.8-1.0=0.6481] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0118 + Deleted old checkpoint: checkpoint_epoch_0115 +[MEM @ epoch 118 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 119/499 (24% done) --- + [Epoch 119][10/40] loss=0.776773 avg=0.869270 VRAM=38.9GiB | 23.8% done | ETA(epoch): 689s + [Epoch 119][20/40] loss=0.998267 avg=0.877228 VRAM=38.8GiB | 23.9% done | ETA(epoch): 460s + [Epoch 119][30/40] loss=0.850970 avg=0.876612 VRAM=38.9GiB | 23.9% done | ETA(epoch): 230s + [Epoch 119][40/40] loss=0.850304 avg=0.874223 VRAM=38.8GiB | 24.0% done | ETA(epoch): 0s + Train loss: 0.874223 (920.4s) ETA: 6368min + Val loss: 0.873982 [t_0.0-0.2=1.0775 t_0.2-0.4=0.9688 t_0.4-0.6=0.8476 t_0.6-0.8=0.7369 t_0.8-1.0=0.6650] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0119 + Deleted old checkpoint: checkpoint_epoch_0116 +[MEM @ epoch 119 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 120/499 (24% done) --- + [Epoch 120][10/40] loss=0.857444 avg=0.849059 VRAM=38.9GiB | 24.1% done | ETA(epoch): 690s + [Epoch 120][20/40] loss=0.811611 avg=0.856421 VRAM=38.8GiB | 24.1% done | ETA(epoch): 460s + [Epoch 120][30/40] loss=0.861052 avg=0.851845 VRAM=38.9GiB | 24.1% done | ETA(epoch): 230s + [Epoch 120][40/40] loss=0.847267 avg=0.855994 VRAM=38.8GiB | 24.2% done | ETA(epoch): 0s + Train loss: 0.855994 (920.8s) ETA: 6350min + Val loss: 0.864690 [t_0.0-0.2=1.0764 t_0.2-0.4=0.9646 t_0.4-0.6=0.8570 t_0.6-0.8=0.7284 t_0.8-1.0=0.6380] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0120 + Deleted old checkpoint: checkpoint_epoch_0117 +[MEM @ epoch 120 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 121/499 (24% done) --- + [Epoch 121][10/40] loss=0.814924 avg=0.832564 VRAM=38.9GiB | 24.2% done | ETA(epoch): 691s + [Epoch 121][20/40] loss=0.767699 avg=0.840653 VRAM=38.8GiB | 24.3% done | ETA(epoch): 460s + [Epoch 121][30/40] loss=0.840942 avg=0.838673 VRAM=38.9GiB | 24.3% done | ETA(epoch): 230s + [Epoch 121][40/40] loss=0.870469 avg=0.839076 VRAM=38.8GiB | 24.4% done | ETA(epoch): 0s + Train loss: 0.839076 (920.6s) ETA: 6332min + Val loss: 0.892221 [t_0.0-0.2=1.0544 t_0.2-0.4=1.0185 t_0.4-0.6=0.9079 t_0.6-0.8=0.7272 t_0.8-1.0=0.6573] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0121 + Deleted old checkpoint: checkpoint_epoch_0118 +[MEM @ epoch 121 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 122/499 (24% done) --- + [Epoch 122][10/40] loss=0.870154 avg=0.876233 VRAM=38.9GiB | 24.4% done | ETA(epoch): 690s + [Epoch 122][20/40] loss=0.819982 avg=0.851674 VRAM=38.8GiB | 24.5% done | ETA(epoch): 460s + [Epoch 122][30/40] loss=0.813168 avg=0.851495 VRAM=38.9GiB | 24.6% done | ETA(epoch): 230s + [Epoch 122][40/40] loss=0.900234 avg=0.864040 VRAM=38.8GiB | 24.6% done | ETA(epoch): 0s + Train loss: 0.864040 (920.6s) ETA: 6315min + Val loss: 0.845161 [t_0.0-0.2=1.0803 t_0.2-0.4=0.9807 t_0.4-0.6=0.8658 t_0.6-0.8=0.7174 t_0.8-1.0=0.6481] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0122 + Deleted old checkpoint: checkpoint_epoch_0119 +[MEM @ epoch 122 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 123/499 (25% done) --- + [Epoch 123][10/40] loss=0.767993 avg=0.858146 VRAM=38.9GiB | 24.6% done | ETA(epoch): 690s + [Epoch 123][20/40] loss=0.820742 avg=0.858900 VRAM=38.8GiB | 24.7% done | ETA(epoch): 460s + [Epoch 123][30/40] loss=0.839595 avg=0.861984 VRAM=38.9GiB | 24.8% done | ETA(epoch): 230s + [Epoch 123][40/40] loss=0.816819 avg=0.861079 VRAM=38.8GiB | 24.8% done | ETA(epoch): 0s + Train loss: 0.861079 (920.3s) ETA: 6297min + Val loss: 0.853943 [t_0.0-0.2=1.0806 t_0.2-0.4=1.0150 t_0.4-0.6=0.8477 t_0.6-0.8=0.6905 t_0.8-1.0=0.6484] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0123 + Deleted old checkpoint: checkpoint_epoch_0120 +[MEM @ epoch 123 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 124/499 (25% done) --- + [Epoch 124][10/40] loss=0.857732 avg=0.840294 VRAM=38.9GiB | 24.9% done | ETA(epoch): 690s + [Epoch 124][20/40] loss=0.801583 avg=0.838339 VRAM=38.8GiB | 24.9% done | ETA(epoch): 460s + [Epoch 124][30/40] loss=0.848155 avg=0.847266 VRAM=38.9GiB | 24.9% done | ETA(epoch): 230s + [Epoch 124][40/40] loss=0.782409 avg=0.848504 VRAM=38.8GiB | 25.0% done | ETA(epoch): 0s + Train loss: 0.848504 (920.1s) ETA: 6279min + Val loss: 0.871782 [t_0.0-0.2=1.0727 t_0.2-0.4=0.9886 t_0.4-0.6=0.8401 t_0.6-0.8=0.7461 t_0.8-1.0=0.6504] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0124 + Deleted old checkpoint: checkpoint_epoch_0121 +[MEM @ epoch 124 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 125/499 (25% done) --- + [Epoch 125][10/40] loss=0.833675 avg=0.858347 VRAM=38.9GiB | 25.1% done | ETA(epoch): 690s + [Epoch 125][20/40] loss=0.927704 avg=0.851174 VRAM=38.8GiB | 25.1% done | ETA(epoch): 460s + [Epoch 125][30/40] loss=0.945497 avg=0.855476 VRAM=38.9GiB | 25.1% done | ETA(epoch): 230s + [Epoch 125][40/40] loss=0.819736 avg=0.854518 VRAM=38.8GiB | 25.2% done | ETA(epoch): 0s + Train loss: 0.854518 (920.3s) ETA: 6262min + Val loss: 0.878955 [t_0.0-0.2=1.0832 t_0.2-0.4=1.0359 t_0.4-0.6=0.8255 t_0.6-0.8=0.7213 t_0.8-1.0=0.6561] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0125 + Deleted old checkpoint: checkpoint_epoch_0122 +[MEM @ epoch 125 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 126/499 (25% done) --- + [Epoch 126][10/40] loss=0.908499 avg=0.880821 VRAM=38.9GiB | 25.2% done | ETA(epoch): 690s + [Epoch 126][20/40] loss=0.897577 avg=0.868520 VRAM=38.8GiB | 25.3% done | ETA(epoch): 461s + [Epoch 126][30/40] loss=0.782521 avg=0.860062 VRAM=38.9GiB | 25.4% done | ETA(epoch): 230s + [Epoch 126][40/40] loss=0.823645 avg=0.860558 VRAM=38.8GiB | 25.4% done | ETA(epoch): 0s + Train loss: 0.860558 (921.2s) ETA: 6244min + Val loss: 0.846492 [t_0.0-0.2=1.0623 t_0.2-0.4=0.9783 t_0.4-0.6=0.8705 t_0.6-0.8=0.6795 t_0.8-1.0=0.6504] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0126 + Deleted old checkpoint: checkpoint_epoch_0123 +[MEM @ epoch 126 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 127/499 (25% done) --- + [Epoch 127][10/40] loss=0.886946 avg=0.850097 VRAM=38.9GiB | 25.4% done | ETA(epoch): 691s + [Epoch 127][20/40] loss=0.864643 avg=0.847257 VRAM=38.8GiB | 25.5% done | ETA(epoch): 460s + [Epoch 127][30/40] loss=0.749958 avg=0.852913 VRAM=38.9GiB | 25.6% done | ETA(epoch): 230s + [Epoch 127][40/40] loss=0.868038 avg=0.856895 VRAM=38.8GiB | 25.6% done | ETA(epoch): 0s + Train loss: 0.856895 (921.1s) ETA: 6226min + Val loss: 0.823257 [t_0.0-0.2=1.0715 t_0.2-0.4=0.9771 t_0.4-0.6=0.8306 t_0.6-0.8=0.7254 t_0.8-1.0=0.6596] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0127 (BEST) + Deleted old checkpoint: checkpoint_epoch_0098 + Deleted old checkpoint: checkpoint_epoch_0124 +[MEM @ epoch 127 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 128/499 (26% done) --- + [Epoch 128][10/40] loss=0.897016 avg=0.853283 VRAM=38.9GiB | 25.7% done | ETA(epoch): 690s + [Epoch 128][20/40] loss=0.848921 avg=0.852068 VRAM=38.8GiB | 25.7% done | ETA(epoch): 460s + [Epoch 128][30/40] loss=0.873986 avg=0.853186 VRAM=38.9GiB | 25.8% done | ETA(epoch): 230s + [Epoch 128][40/40] loss=0.871180 avg=0.849258 VRAM=38.8GiB | 25.8% done | ETA(epoch): 0s + Train loss: 0.849258 (920.7s) ETA: 6209min + Val loss: 0.827158 [t_0.0-0.2=1.0868 t_0.2-0.4=0.9876 t_0.4-0.6=0.8126 t_0.6-0.8=0.7371 t_0.8-1.0=0.6499] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0128 + Deleted old checkpoint: checkpoint_epoch_0125 +[MEM @ epoch 128 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 129/499 (26% done) --- + [Epoch 129][10/40] loss=0.727204 avg=0.843951 VRAM=38.9GiB | 25.9% done | ETA(epoch): 690s + [Epoch 129][20/40] loss=0.793539 avg=0.846406 VRAM=38.8GiB | 25.9% done | ETA(epoch): 460s + [Epoch 129][30/40] loss=0.906772 avg=0.860579 VRAM=38.9GiB | 25.9% done | ETA(epoch): 230s + [Epoch 129][40/40] loss=0.766544 avg=0.856682 VRAM=38.8GiB | 26.0% done | ETA(epoch): 0s + Train loss: 0.856682 (920.5s) ETA: 6191min + Val loss: 0.840405 [t_0.0-0.2=1.0720 t_0.2-0.4=0.9663 t_0.4-0.6=0.8447 t_0.6-0.8=0.7015 t_0.8-1.0=0.6617] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0129 + Deleted old checkpoint: checkpoint_epoch_0126 +[MEM @ epoch 129 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 130/499 (26% done) --- + [Epoch 130][10/40] loss=0.809585 avg=0.826959 VRAM=38.9GiB | 26.1% done | ETA(epoch): 690s + [Epoch 130][20/40] loss=0.736053 avg=0.832894 VRAM=38.8GiB | 26.1% done | ETA(epoch): 460s + [Epoch 130][30/40] loss=0.849482 avg=0.837952 VRAM=38.9GiB | 26.2% done | ETA(epoch): 230s + [Epoch 130][40/40] loss=0.792532 avg=0.841896 VRAM=38.8GiB | 26.2% done | ETA(epoch): 0s + Train loss: 0.841896 (920.3s) ETA: 6174min + Val loss: 0.860581 [t_0.0-0.2=1.0609 t_0.2-0.4=1.0141 t_0.4-0.6=0.8605 t_0.6-0.8=0.7332 t_0.8-1.0=0.6387] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0130 +[MEM @ epoch 130 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 131/499 (26% done) --- + [Epoch 131][10/40] loss=0.866910 avg=0.852811 VRAM=38.9GiB | 26.2% done | ETA(epoch): 691s + [Epoch 131][20/40] loss=0.968526 avg=0.846217 VRAM=38.8GiB | 26.3% done | ETA(epoch): 461s + [Epoch 131][30/40] loss=0.842995 avg=0.845122 VRAM=38.9GiB | 26.4% done | ETA(epoch): 230s + [Epoch 131][40/40] loss=0.901265 avg=0.845424 VRAM=38.8GiB | 26.4% done | ETA(epoch): 0s + Train loss: 0.845424 (921.3s) ETA: 6156min + Val loss: 0.824482 [t_0.0-0.2=1.0700 t_0.2-0.4=1.0033 t_0.4-0.6=0.8562 t_0.6-0.8=0.7178 t_0.8-1.0=0.6562] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0131 + Deleted old checkpoint: checkpoint_epoch_0128 +[MEM @ epoch 131 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 132/499 (26% done) --- + [Epoch 132][10/40] loss=0.799740 avg=0.875205 VRAM=38.9GiB | 26.5% done | ETA(epoch): 690s + [Epoch 132][20/40] loss=0.836254 avg=0.865344 VRAM=38.8GiB | 26.5% done | ETA(epoch): 460s + [Epoch 132][30/40] loss=0.821534 avg=0.858423 VRAM=38.9GiB | 26.6% done | ETA(epoch): 230s + [Epoch 132][40/40] loss=0.941469 avg=0.864203 VRAM=38.8GiB | 26.6% done | ETA(epoch): 0s + Train loss: 0.864203 (919.6s) ETA: 6139min + Val loss: 0.852258 [t_0.0-0.2=1.0842 t_0.2-0.4=0.9632 t_0.4-0.6=0.8552 t_0.6-0.8=0.7251 t_0.8-1.0=0.6454] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0132 + Deleted old checkpoint: checkpoint_epoch_0129 +[MEM @ epoch 132 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 133/499 (27% done) --- + [Epoch 133][10/40] loss=0.778825 avg=0.839966 VRAM=38.9GiB | 26.7% done | ETA(epoch): 689s + [Epoch 133][20/40] loss=0.789471 avg=0.835609 VRAM=38.8GiB | 26.7% done | ETA(epoch): 460s + [Epoch 133][30/40] loss=0.796217 avg=0.845585 VRAM=38.9GiB | 26.8% done | ETA(epoch): 230s + [Epoch 133][40/40] loss=0.854905 avg=0.857364 VRAM=38.8GiB | 26.8% done | ETA(epoch): 0s + Train loss: 0.857364 (920.1s) ETA: 6121min + Val loss: 0.856596 [t_0.0-0.2=1.0682 t_0.2-0.4=1.0201 t_0.4-0.6=0.8788 t_0.6-0.8=0.7109 t_0.8-1.0=0.6302] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0133 + Deleted old checkpoint: checkpoint_epoch_0130 +[MEM @ epoch 133 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 134/499 (27% done) --- + [Epoch 134][10/40] loss=0.730014 avg=0.848623 VRAM=38.9GiB | 26.9% done | ETA(epoch): 690s + [Epoch 134][20/40] loss=0.795281 avg=0.853024 VRAM=38.8GiB | 26.9% done | ETA(epoch): 460s + [Epoch 134][30/40] loss=0.916329 avg=0.860753 VRAM=38.9GiB | 27.0% done | ETA(epoch): 230s + [Epoch 134][40/40] loss=0.911901 avg=0.854892 VRAM=38.8GiB | 27.0% done | ETA(epoch): 0s + Train loss: 0.854892 (920.1s) ETA: 6103min + Val loss: 0.881364 [t_0.0-0.2=1.0723 t_0.2-0.4=0.9878 t_0.4-0.6=0.8315 t_0.6-0.8=0.7377 t_0.8-1.0=0.6457] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0134 + Deleted old checkpoint: checkpoint_epoch_0131 +[MEM @ epoch 134 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 135/499 (27% done) --- + [Epoch 135][10/40] loss=0.921943 avg=0.858755 VRAM=38.9GiB | 27.1% done | ETA(epoch): 690s + [Epoch 135][20/40] loss=0.867594 avg=0.857807 VRAM=38.8GiB | 27.1% done | ETA(epoch): 460s + [Epoch 135][30/40] loss=0.754825 avg=0.849230 VRAM=38.9GiB | 27.2% done | ETA(epoch): 230s + [Epoch 135][40/40] loss=0.874835 avg=0.853663 VRAM=38.8GiB | 27.2% done | ETA(epoch): 0s + Train loss: 0.853663 (920.5s) ETA: 6086min + Val loss: 0.834721 [t_0.0-0.2=1.0760 t_0.2-0.4=0.9895 t_0.4-0.6=0.8060 t_0.6-0.8=0.7229 t_0.8-1.0=0.6442] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0135 + Deleted old checkpoint: checkpoint_epoch_0132 +[MEM @ epoch 135 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 136/499 (27% done) --- + [Epoch 136][10/40] loss=0.805301 avg=0.842449 VRAM=38.9GiB | 27.3% done | ETA(epoch): 690s + [Epoch 136][20/40] loss=0.892399 avg=0.838938 VRAM=38.8GiB | 27.3% done | ETA(epoch): 460s + [Epoch 136][30/40] loss=0.858893 avg=0.845637 VRAM=38.9GiB | 27.4% done | ETA(epoch): 230s + [Epoch 136][40/40] loss=0.721754 avg=0.845211 VRAM=38.8GiB | 27.4% done | ETA(epoch): 0s + Train loss: 0.845211 (919.9s) ETA: 6069min + Val loss: 0.867380 [t_0.0-0.2=1.0912 t_0.2-0.4=1.0111 t_0.4-0.6=0.8257 t_0.6-0.8=0.7120 t_0.8-1.0=0.6298] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0136 + Deleted old checkpoint: checkpoint_epoch_0133 +[MEM @ epoch 136 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 137/499 (27% done) --- + [Epoch 137][10/40] loss=0.844312 avg=0.843607 VRAM=38.9GiB | 27.5% done | ETA(epoch): 690s + [Epoch 137][20/40] loss=0.844660 avg=0.835885 VRAM=38.8GiB | 27.5% done | ETA(epoch): 460s + [Epoch 137][30/40] loss=0.775266 avg=0.838485 VRAM=38.9GiB | 27.6% done | ETA(epoch): 230s + [Epoch 137][40/40] loss=0.800663 avg=0.852745 VRAM=38.8GiB | 27.6% done | ETA(epoch): 0s + Train loss: 0.852745 (920.8s) ETA: 6051min + Val loss: 0.851848 [t_0.0-0.2=1.0725 t_0.2-0.4=1.0190 t_0.4-0.6=0.8466 t_0.6-0.8=0.7451 t_0.8-1.0=0.6388] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0137 + Deleted old checkpoint: checkpoint_epoch_0134 +[MEM @ epoch 137 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 138/499 (28% done) --- + [Epoch 138][10/40] loss=0.767386 avg=0.877856 VRAM=38.9GiB | 27.7% done | ETA(epoch): 691s + [Epoch 138][20/40] loss=0.925812 avg=0.881408 VRAM=38.8GiB | 27.7% done | ETA(epoch): 461s + [Epoch 138][30/40] loss=0.878108 avg=0.873322 VRAM=38.9GiB | 27.8% done | ETA(epoch): 230s + [Epoch 138][40/40] loss=0.819547 avg=0.860171 VRAM=38.8GiB | 27.8% done | ETA(epoch): 0s + Train loss: 0.860171 (921.2s) ETA: 6034min + Val loss: 0.864147 [t_0.0-0.2=1.0832 t_0.2-0.4=0.9754 t_0.4-0.6=0.8442 t_0.6-0.8=0.7362 t_0.8-1.0=0.6548] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0138 + Deleted old checkpoint: checkpoint_epoch_0135 +[MEM @ epoch 138 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 139/499 (28% done) --- + [Epoch 139][10/40] loss=0.920005 avg=0.849111 VRAM=38.9GiB | 27.9% done | ETA(epoch): 690s + [Epoch 139][20/40] loss=0.929382 avg=0.858248 VRAM=38.8GiB | 27.9% done | ETA(epoch): 460s + [Epoch 139][30/40] loss=0.847938 avg=0.861854 VRAM=38.9GiB | 28.0% done | ETA(epoch): 230s + [Epoch 139][40/40] loss=0.852841 avg=0.862032 VRAM=38.8GiB | 28.0% done | ETA(epoch): 0s + Train loss: 0.862032 (920.9s) ETA: 6016min + Val loss: 0.837793 [t_0.0-0.2=1.0741 t_0.2-0.4=1.0098 t_0.4-0.6=0.8594 t_0.6-0.8=0.6861 t_0.8-1.0=0.6658] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0139 + Deleted old checkpoint: checkpoint_epoch_0136 +[MEM @ epoch 139 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 140/499 (28% done) --- + [Epoch 140][10/40] loss=0.968114 avg=0.860574 VRAM=38.9GiB | 28.1% done | ETA(epoch): 690s + [Epoch 140][20/40] loss=0.884229 avg=0.865899 VRAM=38.8GiB | 28.1% done | ETA(epoch): 460s + [Epoch 140][30/40] loss=0.829398 avg=0.852266 VRAM=38.9GiB | 28.1% done | ETA(epoch): 230s + [Epoch 140][40/40] loss=0.804312 avg=0.851013 VRAM=38.8GiB | 28.2% done | ETA(epoch): 0s + Train loss: 0.851013 (919.8s) ETA: 5999min + Val loss: 0.822734 [t_0.0-0.2=1.0701 t_0.2-0.4=0.9916 t_0.4-0.6=0.8372 t_0.6-0.8=0.7316 t_0.8-1.0=0.6517] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0140 (BEST) + Deleted old checkpoint: checkpoint_epoch_0127 + Deleted old checkpoint: checkpoint_epoch_0137 +[MEM @ epoch 140 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 141/499 (28% done) --- + [Epoch 141][10/40] loss=0.898359 avg=0.857876 VRAM=38.9GiB | 28.2% done | ETA(epoch): 690s + [Epoch 141][20/40] loss=0.875226 avg=0.874116 VRAM=38.8GiB | 28.3% done | ETA(epoch): 460s + [Epoch 141][30/40] loss=0.860342 avg=0.863137 VRAM=38.9GiB | 28.3% done | ETA(epoch): 230s + [Epoch 141][40/40] loss=0.749349 avg=0.857703 VRAM=38.8GiB | 28.4% done | ETA(epoch): 0s + Train loss: 0.857703 (920.2s) ETA: 5981min + Val loss: 0.846376 [t_0.0-0.2=1.0652 t_0.2-0.4=0.9943 t_0.4-0.6=0.8405 t_0.6-0.8=0.7327 t_0.8-1.0=0.6570] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0141 + Deleted old checkpoint: checkpoint_epoch_0138 +[MEM @ epoch 141 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 142/499 (28% done) --- + [Epoch 142][10/40] loss=0.809238 avg=0.829798 VRAM=38.9GiB | 28.4% done | ETA(epoch): 690s + [Epoch 142][20/40] loss=0.831426 avg=0.844045 VRAM=38.8GiB | 28.5% done | ETA(epoch): 460s + [Epoch 142][30/40] loss=0.917015 avg=0.840729 VRAM=38.9GiB | 28.5% done | ETA(epoch): 230s + [Epoch 142][40/40] loss=0.776858 avg=0.842244 VRAM=38.8GiB | 28.6% done | ETA(epoch): 0s + Train loss: 0.842244 (920.1s) ETA: 5964min + Val loss: 0.871943 [t_0.0-0.2=1.0685 t_0.2-0.4=1.0085 t_0.4-0.6=0.8625 t_0.6-0.8=0.7247 t_0.8-1.0=0.6506] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0142 + Deleted old checkpoint: checkpoint_epoch_0139 +[MEM @ epoch 142 end] RAM: 17.0/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 143/499 (29% done) --- + [Epoch 143][10/40] loss=0.865537 avg=0.849353 VRAM=38.9GiB | 28.6% done | ETA(epoch): 690s + [Epoch 143][20/40] loss=0.929324 avg=0.873160 VRAM=38.8GiB | 28.7% done | ETA(epoch): 460s + [Epoch 143][30/40] loss=0.883773 avg=0.869660 VRAM=38.9GiB | 28.7% done | ETA(epoch): 230s + [Epoch 143][40/40] loss=0.769940 avg=0.858486 VRAM=38.8GiB | 28.8% done | ETA(epoch): 0s + Train loss: 0.858486 (919.8s) ETA: 5947min + Val loss: 0.838147 [t_0.0-0.2=1.0768 t_0.2-0.4=1.0083 t_0.4-0.6=0.8406 t_0.6-0.8=0.6992 t_0.8-1.0=0.6457] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0143 +[MEM @ epoch 143 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 144/499 (29% done) --- + [Epoch 144][10/40] loss=0.902642 avg=0.875114 VRAM=38.9GiB | 28.8% done | ETA(epoch): 692s + [Epoch 144][20/40] loss=0.859883 avg=0.855147 VRAM=38.8GiB | 28.9% done | ETA(epoch): 461s + [Epoch 144][30/40] loss=0.952662 avg=0.863986 VRAM=38.9GiB | 28.9% done | ETA(epoch): 230s + [Epoch 144][40/40] loss=0.962658 avg=0.861597 VRAM=38.8GiB | 29.0% done | ETA(epoch): 0s + Train loss: 0.861597 (921.4s) ETA: 5929min + Val loss: 0.867222 [t_0.0-0.2=1.0686 t_0.2-0.4=0.9782 t_0.4-0.6=0.8645 t_0.6-0.8=0.7197 t_0.8-1.0=0.6323] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0144 + Deleted old checkpoint: checkpoint_epoch_0141 +[MEM @ epoch 144 end] RAM: 16.8/188.4 GiB (8.9%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 145/499 (29% done) --- + [Epoch 145][10/40] loss=0.857628 avg=0.887722 VRAM=38.9GiB | 29.0% done | ETA(epoch): 690s + [Epoch 145][20/40] loss=0.813065 avg=0.872170 VRAM=38.8GiB | 29.1% done | ETA(epoch): 460s + [Epoch 145][30/40] loss=0.845407 avg=0.868266 VRAM=38.9GiB | 29.1% done | ETA(epoch): 230s + [Epoch 145][40/40] loss=0.908009 avg=0.866880 VRAM=38.8GiB | 29.2% done | ETA(epoch): 0s + Train loss: 0.866880 (920.0s) ETA: 5912min + Val loss: 0.883957 [t_0.0-0.2=1.0849 t_0.2-0.4=0.9733 t_0.4-0.6=0.8173 t_0.6-0.8=0.7509 t_0.8-1.0=0.6443] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0145 + Deleted old checkpoint: checkpoint_epoch_0142 +[MEM @ epoch 145 end] RAM: 16.7/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 146/499 (29% done) --- + [Epoch 146][10/40] loss=0.743092 avg=0.847164 VRAM=38.9GiB | 29.2% done | ETA(epoch): 690s + [Epoch 146][20/40] loss=0.893876 avg=0.853150 VRAM=38.8GiB | 29.3% done | ETA(epoch): 460s + [Epoch 146][30/40] loss=0.877697 avg=0.851434 VRAM=38.9GiB | 29.3% done | ETA(epoch): 230s + [Epoch 146][40/40] loss=0.896857 avg=0.856953 VRAM=38.8GiB | 29.4% done | ETA(epoch): 0s + Train loss: 0.856953 (920.5s) ETA: 5895min + Val loss: 0.883449 [t_0.0-0.2=1.0754 t_0.2-0.4=1.0253 t_0.4-0.6=0.8302 t_0.6-0.8=0.7136 t_0.8-1.0=0.6553] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0146 + Deleted old checkpoint: checkpoint_epoch_0143 +[MEM @ epoch 146 end] RAM: 16.6/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 147/499 (29% done) --- + [Epoch 147][10/40] loss=0.826464 avg=0.852493 VRAM=38.9GiB | 29.4% done | ETA(epoch): 690s + [Epoch 147][20/40] loss=0.917840 avg=0.862480 VRAM=38.8GiB | 29.5% done | ETA(epoch): 460s + [Epoch 147][30/40] loss=0.864641 avg=0.866159 VRAM=38.9GiB | 29.5% done | ETA(epoch): 230s + [Epoch 147][40/40] loss=0.873626 avg=0.864629 VRAM=38.8GiB | 29.6% done | ETA(epoch): 0s + Train loss: 0.864629 (919.9s) ETA: 5877min + Val loss: 0.836287 [t_0.0-0.2=1.0773 t_0.2-0.4=0.9364 t_0.4-0.6=0.8441 t_0.6-0.8=0.7438 t_0.8-1.0=0.6240] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0147 + Deleted old checkpoint: checkpoint_epoch_0144 +[MEM @ epoch 147 end] RAM: 16.9/188.4 GiB (9.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 148/499 (30% done) --- + [Epoch 148][10/40] loss=0.879739 avg=0.861545 VRAM=38.9GiB | 29.6% done | ETA(epoch): 690s + [Epoch 148][20/40] loss=0.925681 avg=0.864288 VRAM=38.8GiB | 29.7% done | ETA(epoch): 460s + [Epoch 148][30/40] loss=0.849766 avg=0.853638 VRAM=38.9GiB | 29.8% done | ETA(epoch): 230s + [Epoch 148][40/40] loss=0.886187 avg=0.856828 VRAM=38.8GiB | 29.8% done | ETA(epoch): 0s + Train loss: 0.856828 (920.4s) ETA: 5860min + Val loss: 0.896919 [t_0.0-0.2=1.0835 t_0.2-0.4=0.9875 t_0.4-0.6=0.8954 t_0.6-0.8=0.6914 t_0.8-1.0=0.6416] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0148 + Deleted old checkpoint: checkpoint_epoch_0145 +[MEM @ epoch 148 end] RAM: 16.6/188.4 GiB (8.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 149/499 (30% done) --- + [Epoch 149][10/40] loss=0.942954 avg=0.861953 VRAM=38.9GiB | 29.8% done | ETA(epoch): 690s + [Epoch 149][20/40] loss=0.932658 avg=0.866306 VRAM=38.8GiB | 29.9% done | ETA(epoch): 460s + [Epoch 149][30/40] loss=0.873284 avg=0.869121 VRAM=38.9GiB | 29.9% done | ETA(epoch): 230s + [Epoch 149][40/40] loss=0.863542 avg=0.867131 VRAM=38.8GiB | 30.0% done | ETA(epoch): 0s + Train loss: 0.867131 (920.1s) ETA: 5843min + Val loss: 0.825458 [t_0.0-0.2=1.0566 t_0.2-0.4=0.9818 t_0.4-0.6=0.8638 t_0.6-0.8=0.7252 t_0.8-1.0=0.6376] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0149 + Deleted old checkpoint: checkpoint_epoch_0146 +[MEM @ epoch 149 end] RAM: 17.2/188.4 GiB (9.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 150/499 (30% done) --- + [MilestoneVis] train_0 step 6000 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 6000 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 6000 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 6000 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 6000 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 6000 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 6000 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 6000 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 6000 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 6000 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 150][10/40] loss=0.833836 avg=0.876645 VRAM=38.9GiB | 30.0% done | ETA(epoch): 2937s + [Epoch 150][20/40] loss=0.854839 avg=0.856410 VRAM=38.8GiB | 30.1% done | ETA(epoch): 1209s + [Epoch 150][30/40] loss=0.855559 avg=0.858949 VRAM=38.9GiB | 30.1% done | ETA(epoch): 480s + [Epoch 150][40/40] loss=0.912357 avg=0.861414 VRAM=38.8GiB | 30.2% done | ETA(epoch): 0s + Train loss: 0.861414 (1669.5s) ETA: 5854min + Val loss: 0.851277 [t_0.0-0.2=1.0696 t_0.2-0.4=0.9635 t_0.4-0.6=0.8570 t_0.6-0.8=0.7243 t_0.8-1.0=0.6482] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0150 + Deleted old checkpoint: checkpoint_epoch_0147 +[MEM @ epoch 150 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 151/499 (30% done) --- + [Epoch 151][10/40] loss=0.895135 avg=0.873933 VRAM=38.9GiB | 30.2% done | ETA(epoch): 689s + [Epoch 151][20/40] loss=0.813060 avg=0.867929 VRAM=38.8GiB | 30.3% done | ETA(epoch): 460s + [Epoch 151][30/40] loss=0.966966 avg=0.864235 VRAM=38.9GiB | 30.3% done | ETA(epoch): 230s + [Epoch 151][40/40] loss=0.938969 avg=0.866795 VRAM=38.8GiB | 30.4% done | ETA(epoch): 0s + Train loss: 0.866795 (919.5s) ETA: 5837min + Val loss: 0.834632 [t_0.0-0.2=1.0946 t_0.2-0.4=0.9929 t_0.4-0.6=0.8190 t_0.6-0.8=0.6969 t_0.8-1.0=0.6506] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0151 + Deleted old checkpoint: checkpoint_epoch_0148 +[MEM @ epoch 151 end] RAM: 17.8/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 152/499 (30% done) --- + [Epoch 152][10/40] loss=0.851418 avg=0.855721 VRAM=38.9GiB | 30.4% done | ETA(epoch): 690s + [Epoch 152][20/40] loss=0.790739 avg=0.856723 VRAM=38.8GiB | 30.5% done | ETA(epoch): 460s + [Epoch 152][30/40] loss=0.816035 avg=0.845824 VRAM=38.9GiB | 30.6% done | ETA(epoch): 230s + [Epoch 152][40/40] loss=0.828727 avg=0.845544 VRAM=38.8GiB | 30.6% done | ETA(epoch): 0s + Train loss: 0.845544 (919.9s) ETA: 5819min + Val loss: 0.869322 [t_0.0-0.2=1.0786 t_0.2-0.4=0.9759 t_0.4-0.6=0.8516 t_0.6-0.8=0.7395 t_0.8-1.0=0.6384] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0152 + Deleted old checkpoint: checkpoint_epoch_0149 +[MEM @ epoch 152 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 153/499 (31% done) --- + [Epoch 153][10/40] loss=0.823753 avg=0.852691 VRAM=38.9GiB | 30.6% done | ETA(epoch): 690s + [Epoch 153][20/40] loss=0.769272 avg=0.841905 VRAM=38.8GiB | 30.7% done | ETA(epoch): 460s + [Epoch 153][30/40] loss=0.749687 avg=0.849707 VRAM=38.9GiB | 30.8% done | ETA(epoch): 230s + [Epoch 153][40/40] loss=0.881358 avg=0.845023 VRAM=38.8GiB | 30.8% done | ETA(epoch): 0s + Train loss: 0.845023 (919.8s) ETA: 5801min + Val loss: 0.827282 [t_0.0-0.2=1.0580 t_0.2-0.4=1.0078 t_0.4-0.6=0.8626 t_0.6-0.8=0.7132 t_0.8-1.0=0.6337] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0153 + Deleted old checkpoint: checkpoint_epoch_0150 +[MEM @ epoch 153 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 154/499 (31% done) --- + [Epoch 154][10/40] loss=0.878109 avg=0.868633 VRAM=38.9GiB | 30.9% done | ETA(epoch): 690s + [Epoch 154][20/40] loss=0.981508 avg=0.867230 VRAM=38.8GiB | 30.9% done | ETA(epoch): 460s + [Epoch 154][30/40] loss=0.897189 avg=0.860799 VRAM=38.9GiB | 30.9% done | ETA(epoch): 230s + [Epoch 154][40/40] loss=0.730167 avg=0.860892 VRAM=38.8GiB | 31.0% done | ETA(epoch): 0s + Train loss: 0.860892 (920.2s) ETA: 5784min + Val loss: 0.874445 [t_0.0-0.2=1.0820 t_0.2-0.4=1.0340 t_0.4-0.6=0.8095 t_0.6-0.8=0.7155 t_0.8-1.0=0.6321] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0154 + Deleted old checkpoint: checkpoint_epoch_0151 +[MEM @ epoch 154 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 155/499 (31% done) --- + [Epoch 155][10/40] loss=0.923109 avg=0.867269 VRAM=38.9GiB | 31.1% done | ETA(epoch): 690s + [Epoch 155][20/40] loss=0.826052 avg=0.877588 VRAM=38.8GiB | 31.1% done | ETA(epoch): 460s + [Epoch 155][30/40] loss=0.780332 avg=0.864315 VRAM=38.9GiB | 31.1% done | ETA(epoch): 230s + [Epoch 155][40/40] loss=0.761433 avg=0.857061 VRAM=38.8GiB | 31.2% done | ETA(epoch): 0s + Train loss: 0.857061 (920.3s) ETA: 5766min + Val loss: 0.878631 [t_0.0-0.2=1.0515 t_0.2-0.4=1.0393 t_0.4-0.6=0.8710 t_0.6-0.8=0.7046 t_0.8-1.0=0.6513] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0155 + Deleted old checkpoint: checkpoint_epoch_0152 +[MEM @ epoch 155 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 156/499 (31% done) --- + [Epoch 156][10/40] loss=0.888181 avg=0.847289 VRAM=38.9GiB | 31.2% done | ETA(epoch): 690s + [Epoch 156][20/40] loss=0.835793 avg=0.837340 VRAM=38.8GiB | 31.3% done | ETA(epoch): 460s + [Epoch 156][30/40] loss=0.899444 avg=0.837137 VRAM=38.9GiB | 31.4% done | ETA(epoch): 230s + [Epoch 156][40/40] loss=0.774805 avg=0.835558 VRAM=38.8GiB | 31.4% done | ETA(epoch): 0s + Train loss: 0.835558 (920.7s) ETA: 5749min + Val loss: 0.832316 [t_0.0-0.2=1.0730 t_0.2-0.4=1.0092 t_0.4-0.6=0.8318 t_0.6-0.8=0.7370 t_0.8-1.0=0.6338] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0156 + Deleted old checkpoint: checkpoint_epoch_0153 +[MEM @ epoch 156 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 157/499 (31% done) --- + [Epoch 157][10/40] loss=0.798442 avg=0.856294 VRAM=38.9GiB | 31.4% done | ETA(epoch): 690s + [Epoch 157][20/40] loss=0.783452 avg=0.839727 VRAM=38.8GiB | 31.5% done | ETA(epoch): 460s + [Epoch 157][30/40] loss=0.892498 avg=0.854929 VRAM=38.9GiB | 31.6% done | ETA(epoch): 230s + [Epoch 157][40/40] loss=0.731825 avg=0.859211 VRAM=38.8GiB | 31.6% done | ETA(epoch): 0s + Train loss: 0.859211 (920.9s) ETA: 5732min + Val loss: 0.850834 [t_0.0-0.2=1.0591 t_0.2-0.4=0.9657 t_0.4-0.6=0.8764 t_0.6-0.8=0.7351 t_0.8-1.0=0.6473] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0157 + Deleted old checkpoint: checkpoint_epoch_0154 +[MEM @ epoch 157 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 158/499 (32% done) --- + [Epoch 158][10/40] loss=0.823702 avg=0.863691 VRAM=38.9GiB | 31.6% done | ETA(epoch): 691s + [Epoch 158][20/40] loss=0.781718 avg=0.856282 VRAM=38.8GiB | 31.7% done | ETA(epoch): 460s + [Epoch 158][30/40] loss=0.714646 avg=0.859375 VRAM=38.9GiB | 31.8% done | ETA(epoch): 230s + [Epoch 158][40/40] loss=1.023150 avg=0.861314 VRAM=38.8GiB | 31.8% done | ETA(epoch): 0s + Train loss: 0.861314 (920.5s) ETA: 5714min + Val loss: 0.861762 [t_0.0-0.2=1.0578 t_0.2-0.4=1.0078 t_0.4-0.6=0.8465 t_0.6-0.8=0.7193 t_0.8-1.0=0.6458] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0158 + Deleted old checkpoint: checkpoint_epoch_0155 +[MEM @ epoch 158 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 159/499 (32% done) --- + [Epoch 159][10/40] loss=0.721874 avg=0.858744 VRAM=38.9GiB | 31.9% done | ETA(epoch): 691s + [Epoch 159][20/40] loss=0.857888 avg=0.857539 VRAM=38.8GiB | 31.9% done | ETA(epoch): 461s + [Epoch 159][30/40] loss=0.800597 avg=0.862119 VRAM=38.9GiB | 31.9% done | ETA(epoch): 230s + [Epoch 159][40/40] loss=0.865639 avg=0.859523 VRAM=38.8GiB | 32.0% done | ETA(epoch): 0s + Train loss: 0.859523 (921.0s) ETA: 5697min + Val loss: 0.865414 [t_0.0-0.2=1.0701 t_0.2-0.4=0.9916 t_0.4-0.6=0.8570 t_0.6-0.8=0.7383 t_0.8-1.0=0.6444] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0159 + Deleted old checkpoint: checkpoint_epoch_0156 +[MEM @ epoch 159 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 160/499 (32% done) --- + [Epoch 160][10/40] loss=0.841157 avg=0.862319 VRAM=38.9GiB | 32.0% done | ETA(epoch): 690s + [Epoch 160][20/40] loss=0.824240 avg=0.854759 VRAM=38.8GiB | 32.1% done | ETA(epoch): 460s + [Epoch 160][30/40] loss=0.981352 avg=0.863218 VRAM=38.9GiB | 32.1% done | ETA(epoch): 230s + [Epoch 160][40/40] loss=0.757561 avg=0.864050 VRAM=38.8GiB | 32.2% done | ETA(epoch): 0s + Train loss: 0.864050 (920.2s) ETA: 5679min + Val loss: 0.882074 [t_0.0-0.2=1.0743 t_0.2-0.4=1.0169 t_0.4-0.6=0.8452 t_0.6-0.8=0.7092 t_0.8-1.0=0.6506] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0160 + Deleted old checkpoint: checkpoint_epoch_0157 +[MEM @ epoch 160 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 161/499 (32% done) --- + [Epoch 161][10/40] loss=0.728348 avg=0.823232 VRAM=38.9GiB | 32.2% done | ETA(epoch): 690s + [Epoch 161][20/40] loss=0.932315 avg=0.853450 VRAM=38.8GiB | 32.3% done | ETA(epoch): 460s + [Epoch 161][30/40] loss=0.868971 avg=0.861537 VRAM=38.9GiB | 32.4% done | ETA(epoch): 230s + [Epoch 161][40/40] loss=0.806302 avg=0.853795 VRAM=38.8GiB | 32.4% done | ETA(epoch): 0s + Train loss: 0.853795 (920.8s) ETA: 5662min + Val loss: 0.862296 [t_0.0-0.2=1.0868 t_0.2-0.4=1.0014 t_0.4-0.6=0.8115 t_0.6-0.8=0.7193 t_0.8-1.0=0.6401] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0161 + Deleted old checkpoint: checkpoint_epoch_0158 +[MEM @ epoch 161 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 162/499 (32% done) --- + [Epoch 162][10/40] loss=0.821822 avg=0.857757 VRAM=38.9GiB | 32.5% done | ETA(epoch): 690s + [Epoch 162][20/40] loss=0.884844 avg=0.853173 VRAM=38.8GiB | 32.5% done | ETA(epoch): 460s + [Epoch 162][30/40] loss=0.808251 avg=0.853473 VRAM=38.9GiB | 32.6% done | ETA(epoch): 230s + [Epoch 162][40/40] loss=0.786822 avg=0.851690 VRAM=38.8GiB | 32.6% done | ETA(epoch): 0s + Train loss: 0.851690 (920.3s) ETA: 5644min + Val loss: 0.853939 [t_0.0-0.2=1.0748 t_0.2-0.4=0.9971 t_0.4-0.6=0.8748 t_0.6-0.8=0.7050 t_0.8-1.0=0.6432] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0162 + Deleted old checkpoint: checkpoint_epoch_0159 +[MEM @ epoch 162 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 163/499 (33% done) --- + [Epoch 163][10/40] loss=0.918751 avg=0.849688 VRAM=38.9GiB | 32.6% done | ETA(epoch): 691s + [Epoch 163][20/40] loss=0.962132 avg=0.845293 VRAM=38.8GiB | 32.7% done | ETA(epoch): 460s + [Epoch 163][30/40] loss=0.808698 avg=0.853954 VRAM=38.9GiB | 32.8% done | ETA(epoch): 230s + [Epoch 163][40/40] loss=0.888470 avg=0.856703 VRAM=38.8GiB | 32.8% done | ETA(epoch): 0s + Train loss: 0.856703 (920.6s) ETA: 5627min + Val loss: 0.869431 [t_0.0-0.2=1.0704 t_0.2-0.4=0.9823 t_0.4-0.6=0.8546 t_0.6-0.8=0.7342 t_0.8-1.0=0.6259] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0163 + Deleted old checkpoint: checkpoint_epoch_0160 +[MEM @ epoch 163 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 164/499 (33% done) --- + [Epoch 164][10/40] loss=0.818912 avg=0.847486 VRAM=38.9GiB | 32.9% done | ETA(epoch): 690s + [Epoch 164][20/40] loss=0.812773 avg=0.852289 VRAM=38.8GiB | 32.9% done | ETA(epoch): 460s + [Epoch 164][30/40] loss=0.806942 avg=0.858300 VRAM=38.9GiB | 33.0% done | ETA(epoch): 230s + [Epoch 164][40/40] loss=0.856354 avg=0.858209 VRAM=38.8GiB | 33.0% done | ETA(epoch): 0s + Train loss: 0.858209 (920.9s) ETA: 5610min + Val loss: 0.842874 [t_0.0-0.2=1.0738 t_0.2-0.4=0.9854 t_0.4-0.6=0.8467 t_0.6-0.8=0.6970 t_0.8-1.0=0.6450] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0164 + Deleted old checkpoint: checkpoint_epoch_0161 +[MEM @ epoch 164 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 165/499 (33% done) --- + [Epoch 165][10/40] loss=0.868344 avg=0.870505 VRAM=38.9GiB | 33.1% done | ETA(epoch): 691s + [Epoch 165][20/40] loss=0.858832 avg=0.895449 VRAM=38.8GiB | 33.1% done | ETA(epoch): 461s + [Epoch 165][30/40] loss=0.810150 avg=0.878415 VRAM=38.9GiB | 33.1% done | ETA(epoch): 230s + [Epoch 165][40/40] loss=0.905745 avg=0.881508 VRAM=38.8GiB | 33.2% done | ETA(epoch): 0s + Train loss: 0.881508 (921.0s) ETA: 5592min + Val loss: 0.869758 [t_0.0-0.2=1.0528 t_0.2-0.4=0.9987 t_0.4-0.6=0.8505 t_0.6-0.8=0.7061 t_0.8-1.0=0.6504] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0165 + Deleted old checkpoint: checkpoint_epoch_0162 +[MEM @ epoch 165 end] RAM: 18.4/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 166/499 (33% done) --- + [Epoch 166][10/40] loss=0.840723 avg=0.858081 VRAM=38.9GiB | 33.2% done | ETA(epoch): 690s + [Epoch 166][20/40] loss=0.795897 avg=0.852394 VRAM=38.8GiB | 33.3% done | ETA(epoch): 460s + [Epoch 166][30/40] loss=0.741659 avg=0.847029 VRAM=38.9GiB | 33.4% done | ETA(epoch): 230s + [Epoch 166][40/40] loss=0.859409 avg=0.847690 VRAM=38.8GiB | 33.4% done | ETA(epoch): 0s + Train loss: 0.847690 (921.1s) ETA: 5575min + Val loss: 0.823523 [t_0.0-0.2=1.0726 t_0.2-0.4=1.0043 t_0.4-0.6=0.8571 t_0.6-0.8=0.7207 t_0.8-1.0=0.6351] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0166 + Deleted old checkpoint: checkpoint_epoch_0163 +[MEM @ epoch 166 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 167/499 (33% done) --- + [Epoch 167][10/40] loss=0.897423 avg=0.867285 VRAM=38.9GiB | 33.5% done | ETA(epoch): 691s + [Epoch 167][20/40] loss=0.870743 avg=0.873462 VRAM=38.8GiB | 33.5% done | ETA(epoch): 461s + [Epoch 167][30/40] loss=0.762251 avg=0.854551 VRAM=38.9GiB | 33.6% done | ETA(epoch): 230s + [Epoch 167][40/40] loss=0.858764 avg=0.863722 VRAM=38.8GiB | 33.6% done | ETA(epoch): 0s + Train loss: 0.863722 (921.4s) ETA: 5558min + Val loss: 0.874686 [t_0.0-0.2=1.0766 t_0.2-0.4=1.0140 t_0.4-0.6=0.8684 t_0.6-0.8=0.7316 t_0.8-1.0=0.6407] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0167 + Deleted old checkpoint: checkpoint_epoch_0164 +[MEM @ epoch 167 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 168/499 (34% done) --- + [Epoch 168][10/40] loss=0.898079 avg=0.873013 VRAM=38.9GiB | 33.7% done | ETA(epoch): 691s + [Epoch 168][20/40] loss=0.948985 avg=0.876250 VRAM=38.8GiB | 33.7% done | ETA(epoch): 461s + [Epoch 168][30/40] loss=0.791418 avg=0.862964 VRAM=38.9GiB | 33.8% done | ETA(epoch): 230s + [Epoch 168][40/40] loss=0.892410 avg=0.861908 VRAM=38.8GiB | 33.8% done | ETA(epoch): 0s + Train loss: 0.861908 (921.4s) ETA: 5540min + Val loss: 0.856790 [t_0.0-0.2=1.0575 t_0.2-0.4=1.0092 t_0.4-0.6=0.8211 t_0.6-0.8=0.7000 t_0.8-1.0=0.6487] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0168 + Deleted old checkpoint: checkpoint_epoch_0165 +[MEM @ epoch 168 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 169/499 (34% done) --- + [Epoch 169][10/40] loss=0.784151 avg=0.848485 VRAM=38.9GiB | 33.9% done | ETA(epoch): 690s + [Epoch 169][20/40] loss=0.944585 avg=0.866044 VRAM=38.8GiB | 33.9% done | ETA(epoch): 460s + [Epoch 169][30/40] loss=0.829562 avg=0.859096 VRAM=38.9GiB | 34.0% done | ETA(epoch): 230s + [Epoch 169][40/40] loss=0.887312 avg=0.862903 VRAM=38.8GiB | 34.0% done | ETA(epoch): 0s + Train loss: 0.862903 (920.4s) ETA: 5523min + Val loss: 0.852786 [t_0.0-0.2=1.0758 t_0.2-0.4=1.0092 t_0.4-0.6=0.8463 t_0.6-0.8=0.7079 t_0.8-1.0=0.6489] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0169 + Deleted old checkpoint: checkpoint_epoch_0166 +[MEM @ epoch 169 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 170/499 (34% done) --- + [Epoch 170][10/40] loss=0.749541 avg=0.800168 VRAM=38.9GiB | 34.1% done | ETA(epoch): 690s + [Epoch 170][20/40] loss=0.752995 avg=0.825900 VRAM=38.8GiB | 34.1% done | ETA(epoch): 460s + [Epoch 170][30/40] loss=0.838806 avg=0.832031 VRAM=38.9GiB | 34.2% done | ETA(epoch): 230s + [Epoch 170][40/40] loss=0.823803 avg=0.836792 VRAM=38.8GiB | 34.2% done | ETA(epoch): 0s + Train loss: 0.836792 (920.4s) ETA: 5506min + Val loss: 0.843757 [t_0.0-0.2=1.0736 t_0.2-0.4=0.9874 t_0.4-0.6=0.8737 t_0.6-0.8=0.7139 t_0.8-1.0=0.6321] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0170 + Deleted old checkpoint: checkpoint_epoch_0167 +[MEM @ epoch 170 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 171/499 (34% done) --- + [Epoch 171][10/40] loss=0.804709 avg=0.841979 VRAM=38.9GiB | 34.2% done | ETA(epoch): 691s + [Epoch 171][20/40] loss=0.756725 avg=0.844397 VRAM=38.8GiB | 34.3% done | ETA(epoch): 461s + [Epoch 171][30/40] loss=0.798418 avg=0.844330 VRAM=38.9GiB | 34.4% done | ETA(epoch): 230s + [Epoch 171][40/40] loss=0.882858 avg=0.844861 VRAM=38.8GiB | 34.4% done | ETA(epoch): 0s + Train loss: 0.844861 (921.4s) ETA: 5489min + Val loss: 0.836857 [t_0.0-0.2=1.0841 t_0.2-0.4=1.0081 t_0.4-0.6=0.8252 t_0.6-0.8=0.6782 t_0.8-1.0=0.6519] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0171 + Deleted old checkpoint: checkpoint_epoch_0168 +[MEM @ epoch 171 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 172/499 (34% done) --- + [Epoch 172][10/40] loss=0.913180 avg=0.855655 VRAM=38.9GiB | 34.4% done | ETA(epoch): 691s + [Epoch 172][20/40] loss=0.774868 avg=0.867192 VRAM=38.8GiB | 34.5% done | ETA(epoch): 461s + [Epoch 172][30/40] loss=0.748241 avg=0.856535 VRAM=38.9GiB | 34.5% done | ETA(epoch): 230s + [Epoch 172][40/40] loss=0.748498 avg=0.852766 VRAM=38.8GiB | 34.6% done | ETA(epoch): 0s + Train loss: 0.852766 (921.3s) ETA: 5471min + Val loss: 0.847220 [t_0.0-0.2=1.0746 t_0.2-0.4=0.9935 t_0.4-0.6=0.8348 t_0.6-0.8=0.7248 t_0.8-1.0=0.6447] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0172 + Deleted old checkpoint: checkpoint_epoch_0169 +[MEM @ epoch 172 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 173/499 (35% done) --- + [Epoch 173][10/40] loss=0.860506 avg=0.893738 VRAM=38.9GiB | 34.6% done | ETA(epoch): 690s + [Epoch 173][20/40] loss=0.779967 avg=0.881382 VRAM=38.8GiB | 34.7% done | ETA(epoch): 460s + [Epoch 173][30/40] loss=0.819862 avg=0.871301 VRAM=38.9GiB | 34.8% done | ETA(epoch): 230s + [Epoch 173][40/40] loss=0.923970 avg=0.872015 VRAM=38.8GiB | 34.8% done | ETA(epoch): 0s + Train loss: 0.872015 (920.9s) ETA: 5454min + Val loss: 0.854530 [t_0.0-0.2=1.0588 t_0.2-0.4=0.9738 t_0.4-0.6=0.8637 t_0.6-0.8=0.7275 t_0.8-1.0=0.6314] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0173 + Deleted old checkpoint: checkpoint_epoch_0170 +[MEM @ epoch 173 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 174/499 (35% done) --- + [Epoch 174][10/40] loss=0.858252 avg=0.814730 VRAM=38.9GiB | 34.8% done | ETA(epoch): 690s + [Epoch 174][20/40] loss=0.778693 avg=0.828418 VRAM=38.8GiB | 34.9% done | ETA(epoch): 460s + [Epoch 174][30/40] loss=0.935426 avg=0.840262 VRAM=38.9GiB | 34.9% done | ETA(epoch): 230s + [Epoch 174][40/40] loss=0.782711 avg=0.842741 VRAM=38.8GiB | 35.0% done | ETA(epoch): 0s + Train loss: 0.842741 (920.9s) ETA: 5437min + Val loss: 0.854473 [t_0.0-0.2=1.0677 t_0.2-0.4=1.0083 t_0.4-0.6=0.8468 t_0.6-0.8=0.7023 t_0.8-1.0=0.6350] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0174 + Deleted old checkpoint: checkpoint_epoch_0171 +[MEM @ epoch 174 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 175/499 (35% done) --- + [Epoch 175][10/40] loss=0.740740 avg=0.865901 VRAM=38.9GiB | 35.0% done | ETA(epoch): 690s + [Epoch 175][20/40] loss=0.791600 avg=0.842184 VRAM=38.8GiB | 35.1% done | ETA(epoch): 460s + [Epoch 175][30/40] loss=0.844422 avg=0.849194 VRAM=38.9GiB | 35.1% done | ETA(epoch): 230s + [Epoch 175][40/40] loss=0.870651 avg=0.852788 VRAM=38.8GiB | 35.2% done | ETA(epoch): 0s + Train loss: 0.852788 (921.0s) ETA: 5419min + Val loss: 0.865409 [t_0.0-0.2=1.0730 t_0.2-0.4=1.0077 t_0.4-0.6=0.8504 t_0.6-0.8=0.7162 t_0.8-1.0=0.6433] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0175 + Deleted old checkpoint: checkpoint_epoch_0172 +[MEM @ epoch 175 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 176/499 (35% done) --- + [Epoch 176][10/40] loss=0.891552 avg=0.837476 VRAM=38.9GiB | 35.2% done | ETA(epoch): 691s + [Epoch 176][20/40] loss=0.836778 avg=0.854717 VRAM=38.8GiB | 35.3% done | ETA(epoch): 461s + [Epoch 176][30/40] loss=0.798367 avg=0.857739 VRAM=38.9GiB | 35.4% done | ETA(epoch): 230s + [Epoch 176][40/40] loss=0.812283 avg=0.863265 VRAM=38.8GiB | 35.4% done | ETA(epoch): 0s + Train loss: 0.863265 (921.1s) ETA: 5402min + Val loss: 0.888365 [t_0.0-0.2=1.0805 t_0.2-0.4=1.0430 t_0.4-0.6=0.8426 t_0.6-0.8=0.7217 t_0.8-1.0=0.6452] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0176 + Deleted old checkpoint: checkpoint_epoch_0173 +[MEM @ epoch 176 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 177/499 (35% done) --- + [Epoch 177][10/40] loss=0.963308 avg=0.871962 VRAM=38.9GiB | 35.4% done | ETA(epoch): 691s + [Epoch 177][20/40] loss=0.899563 avg=0.863159 VRAM=38.8GiB | 35.5% done | ETA(epoch): 460s + [Epoch 177][30/40] loss=0.845590 avg=0.866415 VRAM=38.9GiB | 35.5% done | ETA(epoch): 230s + [Epoch 177][40/40] loss=0.869041 avg=0.871868 VRAM=38.8GiB | 35.6% done | ETA(epoch): 0s + Train loss: 0.871868 (920.8s) ETA: 5385min + Val loss: 0.826658 [t_0.0-0.2=1.0749 t_0.2-0.4=0.9979 t_0.4-0.6=0.8467 t_0.6-0.8=0.6966 t_0.8-1.0=0.6613] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0177 + Deleted old checkpoint: checkpoint_epoch_0174 +[MEM @ epoch 177 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 178/499 (36% done) --- + [Epoch 178][10/40] loss=0.867223 avg=0.858204 VRAM=38.9GiB | 35.6% done | ETA(epoch): 691s + [Epoch 178][20/40] loss=0.829441 avg=0.857598 VRAM=38.8GiB | 35.7% done | ETA(epoch): 460s + [Epoch 178][30/40] loss=0.918840 avg=0.855936 VRAM=38.9GiB | 35.8% done | ETA(epoch): 230s + [Epoch 178][40/40] loss=0.809936 avg=0.855768 VRAM=38.8GiB | 35.8% done | ETA(epoch): 0s + Train loss: 0.855768 (921.0s) ETA: 5368min + Val loss: 0.876902 [t_0.0-0.2=1.0734 t_0.2-0.4=1.0044 t_0.4-0.6=0.8650 t_0.6-0.8=0.7227 t_0.8-1.0=0.6379] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0178 + Deleted old checkpoint: checkpoint_epoch_0175 +[MEM @ epoch 178 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 179/499 (36% done) --- + [Epoch 179][10/40] loss=0.795055 avg=0.871773 VRAM=38.9GiB | 35.9% done | ETA(epoch): 690s + [Epoch 179][20/40] loss=0.860348 avg=0.869500 VRAM=38.8GiB | 35.9% done | ETA(epoch): 460s + [Epoch 179][30/40] loss=0.815854 avg=0.859806 VRAM=38.9GiB | 35.9% done | ETA(epoch): 230s + [Epoch 179][40/40] loss=0.801621 avg=0.851464 VRAM=38.8GiB | 36.0% done | ETA(epoch): 0s + Train loss: 0.851464 (920.4s) ETA: 5351min + Val loss: 0.831757 [t_0.0-0.2=1.0809 t_0.2-0.4=1.0053 t_0.4-0.6=0.8361 t_0.6-0.8=0.7032 t_0.8-1.0=0.6414] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0179 + Deleted old checkpoint: checkpoint_epoch_0176 +[MEM @ epoch 179 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 180/499 (36% done) --- + [Epoch 180][10/40] loss=0.808207 avg=0.862802 VRAM=38.9GiB | 36.0% done | ETA(epoch): 690s + [Epoch 180][20/40] loss=0.927527 avg=0.855438 VRAM=38.8GiB | 36.1% done | ETA(epoch): 460s + [Epoch 180][30/40] loss=0.820695 avg=0.861044 VRAM=38.9GiB | 36.1% done | ETA(epoch): 230s + [Epoch 180][40/40] loss=0.929754 avg=0.861276 VRAM=38.8GiB | 36.2% done | ETA(epoch): 0s + Train loss: 0.861276 (920.7s) ETA: 5333min + Val loss: 0.826884 [t_0.0-0.2=1.0630 t_0.2-0.4=1.0146 t_0.4-0.6=0.8201 t_0.6-0.8=0.7259 t_0.8-1.0=0.6318] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0180 + Deleted old checkpoint: checkpoint_epoch_0177 +[MEM @ epoch 180 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 181/499 (36% done) --- + [Epoch 181][10/40] loss=0.911949 avg=0.879922 VRAM=38.9GiB | 36.2% done | ETA(epoch): 690s + [Epoch 181][20/40] loss=0.828137 avg=0.877588 VRAM=38.8GiB | 36.3% done | ETA(epoch): 460s + [Epoch 181][30/40] loss=0.762369 avg=0.869183 VRAM=38.9GiB | 36.4% done | ETA(epoch): 230s + [Epoch 181][40/40] loss=1.012311 avg=0.876843 VRAM=38.8GiB | 36.4% done | ETA(epoch): 0s + Train loss: 0.876843 (920.3s) ETA: 5316min + Val loss: 0.823028 [t_0.0-0.2=1.0704 t_0.2-0.4=1.0028 t_0.4-0.6=0.8420 t_0.6-0.8=0.7177 t_0.8-1.0=0.6499] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0181 + Deleted old checkpoint: checkpoint_epoch_0178 +[MEM @ epoch 181 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 182/499 (36% done) --- + [Epoch 182][10/40] loss=0.767994 avg=0.844605 VRAM=38.9GiB | 36.4% done | ETA(epoch): 690s + [Epoch 182][20/40] loss=0.873609 avg=0.870224 VRAM=38.8GiB | 36.5% done | ETA(epoch): 460s + [Epoch 182][30/40] loss=0.915921 avg=0.863663 VRAM=38.9GiB | 36.5% done | ETA(epoch): 230s + [Epoch 182][40/40] loss=0.922517 avg=0.864704 VRAM=38.8GiB | 36.6% done | ETA(epoch): 0s + Train loss: 0.864704 (920.0s) ETA: 5299min + Val loss: 0.859084 [t_0.0-0.2=1.0591 t_0.2-0.4=1.0227 t_0.4-0.6=0.8555 t_0.6-0.8=0.7229 t_0.8-1.0=0.6434] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0182 + Deleted old checkpoint: checkpoint_epoch_0179 +[MEM @ epoch 182 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 183/499 (37% done) --- + [Epoch 183][10/40] loss=0.790977 avg=0.854953 VRAM=38.9GiB | 36.6% done | ETA(epoch): 690s + [Epoch 183][20/40] loss=0.785709 avg=0.855072 VRAM=38.8GiB | 36.7% done | ETA(epoch): 460s + [Epoch 183][30/40] loss=0.859495 avg=0.859102 VRAM=38.9GiB | 36.8% done | ETA(epoch): 230s + [Epoch 183][40/40] loss=0.807877 avg=0.850384 VRAM=38.8GiB | 36.8% done | ETA(epoch): 0s + Train loss: 0.850384 (920.7s) ETA: 5282min + Val loss: 0.843891 [t_0.0-0.2=1.0714 t_0.2-0.4=1.0530 t_0.4-0.6=0.8706 t_0.6-0.8=0.6993 t_0.8-1.0=0.6411] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0183 + Deleted old checkpoint: checkpoint_epoch_0180 +[MEM @ epoch 183 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 184/499 (37% done) --- + [Epoch 184][10/40] loss=0.917706 avg=0.879184 VRAM=38.9GiB | 36.9% done | ETA(epoch): 690s + [Epoch 184][20/40] loss=0.853995 avg=0.853416 VRAM=38.8GiB | 36.9% done | ETA(epoch): 460s + [Epoch 184][30/40] loss=0.798891 avg=0.849237 VRAM=38.9GiB | 37.0% done | ETA(epoch): 230s + [Epoch 184][40/40] loss=0.729725 avg=0.848052 VRAM=38.8GiB | 37.0% done | ETA(epoch): 0s + Train loss: 0.848052 (920.0s) ETA: 5265min + Val loss: 0.839084 [t_0.0-0.2=1.0791 t_0.2-0.4=0.9896 t_0.4-0.6=0.8218 t_0.6-0.8=0.7205 t_0.8-1.0=0.6436] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0184 + Deleted old checkpoint: checkpoint_epoch_0181 +[MEM @ epoch 184 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 185/499 (37% done) --- + [Epoch 185][10/40] loss=0.788944 avg=0.842771 VRAM=38.9GiB | 37.0% done | ETA(epoch): 690s + [Epoch 185][20/40] loss=0.893848 avg=0.861805 VRAM=38.8GiB | 37.1% done | ETA(epoch): 460s + [Epoch 185][30/40] loss=0.873109 avg=0.867864 VRAM=38.9GiB | 37.1% done | ETA(epoch): 230s + [Epoch 185][40/40] loss=0.856519 avg=0.866286 VRAM=38.8GiB | 37.2% done | ETA(epoch): 0s + Train loss: 0.866286 (920.9s) ETA: 5247min + Val loss: 0.834461 [t_0.0-0.2=1.0661 t_0.2-0.4=1.0242 t_0.4-0.6=0.8341 t_0.6-0.8=0.7159 t_0.8-1.0=0.6594] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0185 + Deleted old checkpoint: checkpoint_epoch_0182 +[MEM @ epoch 185 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 186/499 (37% done) --- + [Epoch 186][10/40] loss=0.810919 avg=0.824845 VRAM=38.9GiB | 37.2% done | ETA(epoch): 691s + [Epoch 186][20/40] loss=0.967059 avg=0.843645 VRAM=38.8GiB | 37.3% done | ETA(epoch): 461s + [Epoch 186][30/40] loss=0.773626 avg=0.853672 VRAM=38.9GiB | 37.4% done | ETA(epoch): 230s + [Epoch 186][40/40] loss=0.809931 avg=0.856997 VRAM=38.8GiB | 37.4% done | ETA(epoch): 0s + Train loss: 0.856997 (921.0s) ETA: 5230min + Val loss: 0.854784 [t_0.0-0.2=1.0896 t_0.2-0.4=1.0010 t_0.4-0.6=0.8203 t_0.6-0.8=0.6796 t_0.8-1.0=0.6351] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0186 + Deleted old checkpoint: checkpoint_epoch_0183 +[MEM @ epoch 186 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 187/499 (37% done) --- + [Epoch 187][10/40] loss=0.827449 avg=0.858170 VRAM=38.9GiB | 37.5% done | ETA(epoch): 691s + [Epoch 187][20/40] loss=0.884509 avg=0.867822 VRAM=38.8GiB | 37.5% done | ETA(epoch): 461s + [Epoch 187][30/40] loss=0.864898 avg=0.862315 VRAM=38.9GiB | 37.5% done | ETA(epoch): 230s + [Epoch 187][40/40] loss=0.847651 avg=0.857714 VRAM=38.8GiB | 37.6% done | ETA(epoch): 0s + Train loss: 0.857714 (921.1s) ETA: 5213min + Val loss: 0.830136 [t_0.0-0.2=1.0763 t_0.2-0.4=0.9729 t_0.4-0.6=0.8520 t_0.6-0.8=0.6931 t_0.8-1.0=0.6402] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0187 + Deleted old checkpoint: checkpoint_epoch_0184 +[MEM @ epoch 187 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 188/499 (38% done) --- + [Epoch 188][10/40] loss=0.749115 avg=0.835009 VRAM=38.9GiB | 37.6% done | ETA(epoch): 691s + [Epoch 188][20/40] loss=0.945417 avg=0.868548 VRAM=38.8GiB | 37.7% done | ETA(epoch): 460s + [Epoch 188][30/40] loss=0.912768 avg=0.867186 VRAM=38.9GiB | 37.8% done | ETA(epoch): 230s + [Epoch 188][40/40] loss=0.895901 avg=0.864091 VRAM=38.8GiB | 37.8% done | ETA(epoch): 0s + Train loss: 0.864091 (920.5s) ETA: 5196min + Val loss: 0.873524 [t_0.0-0.2=1.0612 t_0.2-0.4=0.9900 t_0.4-0.6=0.8670 t_0.6-0.8=0.7085 t_0.8-1.0=0.6520] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0188 + Deleted old checkpoint: checkpoint_epoch_0185 +[MEM @ epoch 188 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 189/499 (38% done) --- + [Epoch 189][10/40] loss=0.888015 avg=0.848995 VRAM=38.9GiB | 37.9% done | ETA(epoch): 690s + [Epoch 189][20/40] loss=0.860280 avg=0.855322 VRAM=38.8GiB | 37.9% done | ETA(epoch): 460s + [Epoch 189][30/40] loss=0.864583 avg=0.853834 VRAM=38.9GiB | 38.0% done | ETA(epoch): 230s + [Epoch 189][40/40] loss=0.866891 avg=0.851338 VRAM=38.8GiB | 38.0% done | ETA(epoch): 0s + Train loss: 0.851338 (919.9s) ETA: 5179min + Val loss: 0.860699 [t_0.0-0.2=1.0756 t_0.2-0.4=1.0033 t_0.4-0.6=0.8225 t_0.6-0.8=0.7322 t_0.8-1.0=0.6564] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0189 + Deleted old checkpoint: checkpoint_epoch_0186 +[MEM @ epoch 189 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 190/499 (38% done) --- + [Epoch 190][10/40] loss=0.775542 avg=0.844951 VRAM=38.9GiB | 38.0% done | ETA(epoch): 690s + [Epoch 190][20/40] loss=0.842966 avg=0.853080 VRAM=38.8GiB | 38.1% done | ETA(epoch): 460s + [Epoch 190][30/40] loss=0.942167 avg=0.862850 VRAM=38.9GiB | 38.1% done | ETA(epoch): 230s + [Epoch 190][40/40] loss=0.964652 avg=0.863245 VRAM=38.8GiB | 38.2% done | ETA(epoch): 0s + Train loss: 0.863245 (920.2s) ETA: 5162min + Val loss: 0.855242 [t_0.0-0.2=1.0880 t_0.2-0.4=0.9788 t_0.4-0.6=0.8563 t_0.6-0.8=0.7342 t_0.8-1.0=0.6442] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0190 + Deleted old checkpoint: checkpoint_epoch_0187 +[MEM @ epoch 190 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 191/499 (38% done) --- + [Epoch 191][10/40] loss=0.883351 avg=0.834945 VRAM=38.9GiB | 38.2% done | ETA(epoch): 690s + [Epoch 191][20/40] loss=0.920879 avg=0.861316 VRAM=38.8GiB | 38.3% done | ETA(epoch): 460s + [Epoch 191][30/40] loss=0.822054 avg=0.850552 VRAM=38.9GiB | 38.4% done | ETA(epoch): 230s + [Epoch 191][40/40] loss=0.779411 avg=0.850992 VRAM=38.8GiB | 38.4% done | ETA(epoch): 0s + Train loss: 0.850992 (920.6s) ETA: 5144min + Val loss: 0.821108 [t_0.0-0.2=1.0599 t_0.2-0.4=0.9781 t_0.4-0.6=0.8264 t_0.6-0.8=0.7269 t_0.8-1.0=0.6367] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0191 (BEST) + Deleted old checkpoint: checkpoint_epoch_0140 + Deleted old checkpoint: checkpoint_epoch_0188 +[MEM @ epoch 191 end] RAM: 17.6/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 192/499 (38% done) --- + [Epoch 192][10/40] loss=0.830854 avg=0.834767 VRAM=38.9GiB | 38.5% done | ETA(epoch): 690s + [Epoch 192][20/40] loss=0.823346 avg=0.837980 VRAM=38.8GiB | 38.5% done | ETA(epoch): 460s + [Epoch 192][30/40] loss=0.864652 avg=0.843623 VRAM=38.9GiB | 38.6% done | ETA(epoch): 230s + [Epoch 192][40/40] loss=0.750442 avg=0.848561 VRAM=38.8GiB | 38.6% done | ETA(epoch): 0s + Train loss: 0.848561 (920.0s) ETA: 5127min + Val loss: 0.858119 [t_0.0-0.2=1.0729 t_0.2-0.4=0.9829 t_0.4-0.6=0.8391 t_0.6-0.8=0.7113 t_0.8-1.0=0.6280] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0192 + Deleted old checkpoint: checkpoint_epoch_0189 +[MEM @ epoch 192 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 193/499 (39% done) --- + [Epoch 193][10/40] loss=0.853323 avg=0.837173 VRAM=38.9GiB | 38.6% done | ETA(epoch): 690s + [Epoch 193][20/40] loss=0.907278 avg=0.851605 VRAM=38.8GiB | 38.7% done | ETA(epoch): 460s + [Epoch 193][30/40] loss=0.809286 avg=0.849386 VRAM=38.9GiB | 38.8% done | ETA(epoch): 230s + [Epoch 193][40/40] loss=0.869850 avg=0.849847 VRAM=38.8GiB | 38.8% done | ETA(epoch): 0s + Train loss: 0.849847 (920.1s) ETA: 5110min + Val loss: 0.874957 [t_0.0-0.2=1.0653 t_0.2-0.4=0.9892 t_0.4-0.6=0.8574 t_0.6-0.8=0.7064 t_0.8-1.0=0.6230] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0193 + Deleted old checkpoint: checkpoint_epoch_0190 +[MEM @ epoch 193 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 194/499 (39% done) --- + [Epoch 194][10/40] loss=0.823216 avg=0.847439 VRAM=38.9GiB | 38.9% done | ETA(epoch): 689s + [Epoch 194][20/40] loss=0.768549 avg=0.846350 VRAM=38.8GiB | 38.9% done | ETA(epoch): 460s + [Epoch 194][30/40] loss=0.889940 avg=0.855449 VRAM=38.9GiB | 39.0% done | ETA(epoch): 230s + [Epoch 194][40/40] loss=0.843618 avg=0.857665 VRAM=38.8GiB | 39.0% done | ETA(epoch): 0s + Train loss: 0.857665 (919.9s) ETA: 5093min + Val loss: 0.854167 [t_0.0-0.2=1.0640 t_0.2-0.4=1.0050 t_0.4-0.6=0.8682 t_0.6-0.8=0.6967 t_0.8-1.0=0.6391] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0194 +[MEM @ epoch 194 end] RAM: 17.5/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 195/499 (39% done) --- + [Epoch 195][10/40] loss=0.771987 avg=0.848168 VRAM=38.9GiB | 39.1% done | ETA(epoch): 691s + [Epoch 195][20/40] loss=0.893339 avg=0.854785 VRAM=38.8GiB | 39.1% done | ETA(epoch): 461s + [Epoch 195][30/40] loss=0.797750 avg=0.850583 VRAM=38.9GiB | 39.1% done | ETA(epoch): 230s + [Epoch 195][40/40] loss=0.857862 avg=0.851251 VRAM=38.8GiB | 39.2% done | ETA(epoch): 0s + Train loss: 0.851251 (921.9s) ETA: 5076min + Val loss: 0.882667 [t_0.0-0.2=1.0955 t_0.2-0.4=1.0105 t_0.4-0.6=0.7874 t_0.6-0.8=0.7013 t_0.8-1.0=0.6425] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0195 + Deleted old checkpoint: checkpoint_epoch_0192 +[MEM @ epoch 195 end] RAM: 17.7/188.4 GiB (9.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 196/499 (39% done) --- + [Epoch 196][10/40] loss=0.870860 avg=0.833472 VRAM=38.9GiB | 39.2% done | ETA(epoch): 690s + [Epoch 196][20/40] loss=0.867564 avg=0.858425 VRAM=38.8GiB | 39.3% done | ETA(epoch): 460s + [Epoch 196][30/40] loss=0.836890 avg=0.855374 VRAM=38.9GiB | 39.4% done | ETA(epoch): 230s + [Epoch 196][40/40] loss=0.807734 avg=0.846356 VRAM=38.8GiB | 39.4% done | ETA(epoch): 0s + Train loss: 0.846356 (920.1s) ETA: 5059min + Val loss: 0.854661 [t_0.0-0.2=1.0548 t_0.2-0.4=0.9795 t_0.4-0.6=0.8212 t_0.6-0.8=0.7144 t_0.8-1.0=0.6421] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0196 + Deleted old checkpoint: checkpoint_epoch_0193 +[MEM @ epoch 196 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 197/499 (39% done) --- + [Epoch 197][10/40] loss=0.960706 avg=0.886461 VRAM=38.9GiB | 39.5% done | ETA(epoch): 691s + [Epoch 197][20/40] loss=0.958695 avg=0.883210 VRAM=38.8GiB | 39.5% done | ETA(epoch): 461s + [Epoch 197][30/40] loss=0.792615 avg=0.877353 VRAM=38.9GiB | 39.6% done | ETA(epoch): 230s + [Epoch 197][40/40] loss=0.810930 avg=0.869720 VRAM=38.8GiB | 39.6% done | ETA(epoch): 0s + Train loss: 0.869720 (920.8s) ETA: 5042min + Val loss: 0.866644 [t_0.0-0.2=1.0709 t_0.2-0.4=1.0247 t_0.4-0.6=0.8535 t_0.6-0.8=0.6910 t_0.8-1.0=0.6449] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0197 + Deleted old checkpoint: checkpoint_epoch_0194 +[MEM @ epoch 197 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 198/499 (40% done) --- + [Epoch 198][10/40] loss=0.815904 avg=0.858310 VRAM=38.9GiB | 39.6% done | ETA(epoch): 690s + [Epoch 198][20/40] loss=0.828493 avg=0.876133 VRAM=38.8GiB | 39.7% done | ETA(epoch): 460s + [Epoch 198][30/40] loss=0.906598 avg=0.876131 VRAM=38.9GiB | 39.8% done | ETA(epoch): 230s + [Epoch 198][40/40] loss=0.784999 avg=0.870437 VRAM=38.8GiB | 39.8% done | ETA(epoch): 0s + Train loss: 0.870437 (919.9s) ETA: 5025min + Val loss: 0.868978 [t_0.0-0.2=1.0759 t_0.2-0.4=1.0122 t_0.4-0.6=0.8483 t_0.6-0.8=0.7161 t_0.8-1.0=0.6211] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0198 + Deleted old checkpoint: checkpoint_epoch_0195 +[MEM @ epoch 198 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 199/499 (40% done) --- + [Epoch 199][10/40] loss=0.843676 avg=0.866187 VRAM=38.9GiB | 39.9% done | ETA(epoch): 691s + [Epoch 199][20/40] loss=0.924862 avg=0.866845 VRAM=38.8GiB | 39.9% done | ETA(epoch): 461s + [Epoch 199][30/40] loss=0.865468 avg=0.869969 VRAM=38.9GiB | 40.0% done | ETA(epoch): 230s + [Epoch 199][40/40] loss=0.781296 avg=0.864174 VRAM=38.8GiB | 40.0% done | ETA(epoch): 0s + Train loss: 0.864174 (921.7s) ETA: 5008min + Val loss: 0.857214 [t_0.0-0.2=1.0795 t_0.2-0.4=0.9868 t_0.4-0.6=0.8112 t_0.6-0.8=0.6992 t_0.8-1.0=0.6368] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0199 + Deleted old checkpoint: checkpoint_epoch_0196 +[MEM @ epoch 199 end] RAM: 17.6/188.4 GiB (9.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 200/499 (40% done) --- + [MilestoneVis] train_0 step 8000 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 8000 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 8000 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 8000 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 8000 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 8000 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 8000 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 8000 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 8000 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 8000 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 200][10/40] loss=0.866837 avg=0.852931 VRAM=38.9GiB | 40.1% done | ETA(epoch): 2940s + [Epoch 200][20/40] loss=0.956229 avg=0.863915 VRAM=38.8GiB | 40.1% done | ETA(epoch): 1210s + [Epoch 200][30/40] loss=0.952178 avg=0.872942 VRAM=38.9GiB | 40.2% done | ETA(epoch): 480s + [Epoch 200][40/40] loss=0.841754 avg=0.866454 VRAM=38.8GiB | 40.2% done | ETA(epoch): 0s + Train loss: 0.866454 (1669.8s) ETA: 5009min + Val loss: 0.821188 [t_0.0-0.2=1.0702 t_0.2-0.4=1.0007 t_0.4-0.6=0.8070 t_0.6-0.8=0.7194 t_0.8-1.0=0.6389] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0200 + Deleted old checkpoint: checkpoint_epoch_0197 +[MEM @ epoch 200 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 201/499 (40% done) --- + [Epoch 201][10/40] loss=0.945536 avg=0.868058 VRAM=38.9GiB | 40.2% done | ETA(epoch): 690s + [Epoch 201][20/40] loss=0.905835 avg=0.859287 VRAM=38.8GiB | 40.3% done | ETA(epoch): 460s + [Epoch 201][30/40] loss=0.877349 avg=0.849854 VRAM=38.9GiB | 40.4% done | ETA(epoch): 230s + [Epoch 201][40/40] loss=0.744889 avg=0.837689 VRAM=38.8GiB | 40.4% done | ETA(epoch): 0s + Train loss: 0.837689 (920.2s) ETA: 4992min + Val loss: 0.842651 [t_0.0-0.2=1.0733 t_0.2-0.4=1.0328 t_0.4-0.6=0.8475 t_0.6-0.8=0.7002 t_0.8-1.0=0.6412] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0201 + Deleted old checkpoint: checkpoint_epoch_0198 +[MEM @ epoch 201 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 202/499 (40% done) --- + [Epoch 202][10/40] loss=0.987441 avg=0.847864 VRAM=38.9GiB | 40.5% done | ETA(epoch): 689s + [Epoch 202][20/40] loss=0.782830 avg=0.861082 VRAM=38.8GiB | 40.5% done | ETA(epoch): 460s + [Epoch 202][30/40] loss=0.866747 avg=0.865274 VRAM=38.9GiB | 40.6% done | ETA(epoch): 230s + [Epoch 202][40/40] loss=0.797802 avg=0.855328 VRAM=38.8GiB | 40.6% done | ETA(epoch): 0s + Train loss: 0.855328 (919.4s) ETA: 4975min + Val loss: 0.853591 [t_0.0-0.2=1.0684 t_0.2-0.4=0.9993 t_0.4-0.6=0.8452 t_0.6-0.8=0.7207 t_0.8-1.0=0.6386] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0202 + Deleted old checkpoint: checkpoint_epoch_0199 +[MEM @ epoch 202 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 203/499 (41% done) --- + [Epoch 203][10/40] loss=0.849405 avg=0.857569 VRAM=38.9GiB | 40.6% done | ETA(epoch): 690s + [Epoch 203][20/40] loss=0.773452 avg=0.849628 VRAM=38.8GiB | 40.7% done | ETA(epoch): 460s + [Epoch 203][30/40] loss=0.787112 avg=0.848630 VRAM=38.9GiB | 40.8% done | ETA(epoch): 230s + [Epoch 203][40/40] loss=0.825794 avg=0.855544 VRAM=38.8GiB | 40.8% done | ETA(epoch): 0s + Train loss: 0.855544 (920.0s) ETA: 4958min + Val loss: 0.855729 [t_0.0-0.2=1.0861 t_0.2-0.4=1.0119 t_0.4-0.6=0.8550 t_0.6-0.8=0.7114 t_0.8-1.0=0.6198] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0203 + Deleted old checkpoint: checkpoint_epoch_0200 +[MEM @ epoch 203 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 204/499 (41% done) --- + [Epoch 204][10/40] loss=0.850915 avg=0.837770 VRAM=38.9GiB | 40.8% done | ETA(epoch): 689s + [Epoch 204][20/40] loss=0.910465 avg=0.844967 VRAM=38.8GiB | 40.9% done | ETA(epoch): 459s + [Epoch 204][30/40] loss=0.792274 avg=0.849381 VRAM=38.9GiB | 40.9% done | ETA(epoch): 230s + [Epoch 204][40/40] loss=0.883668 avg=0.843841 VRAM=38.8GiB | 41.0% done | ETA(epoch): 0s + Train loss: 0.843841 (918.8s) ETA: 4940min + Val loss: 0.877909 [t_0.0-0.2=1.0814 t_0.2-0.4=1.0219 t_0.4-0.6=0.8375 t_0.6-0.8=0.6951 t_0.8-1.0=0.6372] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0204 + Deleted old checkpoint: checkpoint_epoch_0201 +[MEM @ epoch 204 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 205/499 (41% done) --- + [Epoch 205][10/40] loss=0.861978 avg=0.884907 VRAM=38.9GiB | 41.0% done | ETA(epoch): 689s + [Epoch 205][20/40] loss=0.794842 avg=0.857869 VRAM=38.8GiB | 41.1% done | ETA(epoch): 460s + [Epoch 205][30/40] loss=0.932633 avg=0.860038 VRAM=38.9GiB | 41.1% done | ETA(epoch): 230s + [Epoch 205][40/40] loss=0.863412 avg=0.858613 VRAM=38.8GiB | 41.2% done | ETA(epoch): 0s + Train loss: 0.858613 (919.3s) ETA: 4923min + Val loss: 0.849562 [t_0.0-0.2=1.0707 t_0.2-0.4=0.9971 t_0.4-0.6=0.8353 t_0.6-0.8=0.7377 t_0.8-1.0=0.6295] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0205 + Deleted old checkpoint: checkpoint_epoch_0202 +[MEM @ epoch 205 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 206/499 (41% done) --- + [Epoch 206][10/40] loss=0.899106 avg=0.857605 VRAM=38.9GiB | 41.2% done | ETA(epoch): 689s + [Epoch 206][20/40] loss=0.838990 avg=0.853784 VRAM=38.8GiB | 41.3% done | ETA(epoch): 460s + [Epoch 206][30/40] loss=0.870295 avg=0.865317 VRAM=38.9GiB | 41.3% done | ETA(epoch): 230s + [Epoch 206][40/40] loss=0.851236 avg=0.860667 VRAM=38.8GiB | 41.4% done | ETA(epoch): 0s + Train loss: 0.860667 (919.8s) ETA: 4906min + Val loss: 0.820941 [t_0.0-0.2=1.0709 t_0.2-0.4=0.9553 t_0.4-0.6=0.8687 t_0.6-0.8=0.6957 t_0.8-1.0=0.6256] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0206 (BEST) + Deleted old checkpoint: checkpoint_epoch_0191 + Deleted old checkpoint: checkpoint_epoch_0203 +[MEM @ epoch 206 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 207/499 (41% done) --- + [Epoch 207][10/40] loss=0.894665 avg=0.856407 VRAM=38.9GiB | 41.4% done | ETA(epoch): 689s + [Epoch 207][20/40] loss=0.795412 avg=0.818574 VRAM=38.8GiB | 41.5% done | ETA(epoch): 460s + [Epoch 207][30/40] loss=0.782569 avg=0.829458 VRAM=38.9GiB | 41.5% done | ETA(epoch): 230s + [Epoch 207][40/40] loss=0.811238 avg=0.839221 VRAM=38.8GiB | 41.6% done | ETA(epoch): 0s + Train loss: 0.839221 (919.3s) ETA: 4889min + Val loss: 0.881579 [t_0.0-0.2=1.0690 t_0.2-0.4=0.9855 t_0.4-0.6=0.8524 t_0.6-0.8=0.7189 t_0.8-1.0=0.6449] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0207 + Deleted old checkpoint: checkpoint_epoch_0204 +[MEM @ epoch 207 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 208/499 (42% done) --- + [Epoch 208][10/40] loss=0.860041 avg=0.871642 VRAM=38.9GiB | 41.6% done | ETA(epoch): 689s + [Epoch 208][20/40] loss=0.782443 avg=0.866707 VRAM=38.8GiB | 41.7% done | ETA(epoch): 459s + [Epoch 208][30/40] loss=0.846111 avg=0.852007 VRAM=38.9GiB | 41.8% done | ETA(epoch): 230s + [Epoch 208][40/40] loss=0.812592 avg=0.848699 VRAM=38.8GiB | 41.8% done | ETA(epoch): 0s + Train loss: 0.848699 (919.1s) ETA: 4871min + Val loss: 0.857238 [t_0.0-0.2=1.0666 t_0.2-0.4=0.9895 t_0.4-0.6=0.8525 t_0.6-0.8=0.7194 t_0.8-1.0=0.6511] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0208 + Deleted old checkpoint: checkpoint_epoch_0205 +[MEM @ epoch 208 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 209/499 (42% done) --- + [Epoch 209][10/40] loss=0.773140 avg=0.845708 VRAM=38.9GiB | 41.9% done | ETA(epoch): 690s + [Epoch 209][20/40] loss=0.855734 avg=0.866345 VRAM=38.8GiB | 41.9% done | ETA(epoch): 460s + [Epoch 209][30/40] loss=0.770506 avg=0.857728 VRAM=38.9GiB | 41.9% done | ETA(epoch): 230s + [Epoch 209][40/40] loss=0.827647 avg=0.858694 VRAM=38.8GiB | 42.0% done | ETA(epoch): 0s + Train loss: 0.858694 (919.6s) ETA: 4854min + Val loss: 0.850658 [t_0.0-0.2=1.0801 t_0.2-0.4=1.0066 t_0.4-0.6=0.8340 t_0.6-0.8=0.7421 t_0.8-1.0=0.6405] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0209 +[MEM @ epoch 209 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 210/499 (42% done) --- + [Epoch 210][10/40] loss=0.820607 avg=0.850331 VRAM=38.9GiB | 42.0% done | ETA(epoch): 690s + [Epoch 210][20/40] loss=0.942782 avg=0.851885 VRAM=38.8GiB | 42.1% done | ETA(epoch): 460s + [Epoch 210][30/40] loss=0.873716 avg=0.848512 VRAM=38.9GiB | 42.1% done | ETA(epoch): 230s + [Epoch 210][40/40] loss=0.749308 avg=0.851775 VRAM=38.8GiB | 42.2% done | ETA(epoch): 0s + Train loss: 0.851775 (920.7s) ETA: 4837min + Val loss: 0.846200 [t_0.0-0.2=1.0692 t_0.2-0.4=0.9991 t_0.4-0.6=0.8254 t_0.6-0.8=0.7122 t_0.8-1.0=0.6180] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0210 + Deleted old checkpoint: checkpoint_epoch_0207 +[MEM @ epoch 210 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 211/499 (42% done) --- + [Epoch 211][10/40] loss=0.870797 avg=0.842201 VRAM=38.9GiB | 42.2% done | ETA(epoch): 690s + [Epoch 211][20/40] loss=0.842578 avg=0.838460 VRAM=38.8GiB | 42.3% done | ETA(epoch): 460s + [Epoch 211][30/40] loss=0.858226 avg=0.846751 VRAM=38.9GiB | 42.4% done | ETA(epoch): 230s + [Epoch 211][40/40] loss=0.825791 avg=0.844498 VRAM=38.8GiB | 42.4% done | ETA(epoch): 0s + Train loss: 0.844498 (920.0s) ETA: 4820min + Val loss: 0.865569 [t_0.0-0.2=1.0556 t_0.2-0.4=1.0025 t_0.4-0.6=0.8338 t_0.6-0.8=0.7212 t_0.8-1.0=0.6381] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0211 + Deleted old checkpoint: checkpoint_epoch_0208 +[MEM @ epoch 211 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 212/499 (42% done) --- + [Epoch 212][10/40] loss=0.823932 avg=0.852855 VRAM=38.9GiB | 42.4% done | ETA(epoch): 690s + [Epoch 212][20/40] loss=0.892756 avg=0.865603 VRAM=38.8GiB | 42.5% done | ETA(epoch): 460s + [Epoch 212][30/40] loss=0.730557 avg=0.859396 VRAM=38.9GiB | 42.5% done | ETA(epoch): 230s + [Epoch 212][40/40] loss=0.836043 avg=0.859975 VRAM=38.8GiB | 42.6% done | ETA(epoch): 0s + Train loss: 0.859975 (920.2s) ETA: 4803min + Val loss: 0.849765 [t_0.0-0.2=1.0775 t_0.2-0.4=0.9972 t_0.4-0.6=0.8165 t_0.6-0.8=0.7257 t_0.8-1.0=0.6356] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0212 + Deleted old checkpoint: checkpoint_epoch_0209 +[MEM @ epoch 212 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 213/499 (43% done) --- + [Epoch 213][10/40] loss=0.879285 avg=0.855405 VRAM=38.9GiB | 42.6% done | ETA(epoch): 690s + [Epoch 213][20/40] loss=0.949453 avg=0.852369 VRAM=38.8GiB | 42.7% done | ETA(epoch): 460s + [Epoch 213][30/40] loss=0.918827 avg=0.853650 VRAM=38.9GiB | 42.8% done | ETA(epoch): 230s + [Epoch 213][40/40] loss=0.904456 avg=0.850096 VRAM=38.8GiB | 42.8% done | ETA(epoch): 0s + Train loss: 0.850096 (920.5s) ETA: 4786min + Val loss: 0.864078 [t_0.0-0.2=1.0853 t_0.2-0.4=1.0142 t_0.4-0.6=0.8538 t_0.6-0.8=0.6996 t_0.8-1.0=0.6275] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0213 + Deleted old checkpoint: checkpoint_epoch_0210 +[MEM @ epoch 213 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 214/499 (43% done) --- + [Epoch 214][10/40] loss=0.918913 avg=0.863057 VRAM=38.9GiB | 42.9% done | ETA(epoch): 691s + [Epoch 214][20/40] loss=0.774792 avg=0.851291 VRAM=38.8GiB | 42.9% done | ETA(epoch): 461s + [Epoch 214][30/40] loss=0.815185 avg=0.845695 VRAM=38.9GiB | 43.0% done | ETA(epoch): 230s + [Epoch 214][40/40] loss=0.816832 avg=0.843745 VRAM=38.8GiB | 43.0% done | ETA(epoch): 0s + Train loss: 0.843745 (921.8s) ETA: 4769min + Val loss: 0.838506 [t_0.0-0.2=1.0601 t_0.2-0.4=0.9991 t_0.4-0.6=0.8436 t_0.6-0.8=0.7109 t_0.8-1.0=0.6283] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0214 + Deleted old checkpoint: checkpoint_epoch_0211 +[MEM @ epoch 214 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 215/499 (43% done) --- + [Epoch 215][10/40] loss=0.814419 avg=0.856870 VRAM=38.9GiB | 43.0% done | ETA(epoch): 691s + [Epoch 215][20/40] loss=0.970744 avg=0.851363 VRAM=38.8GiB | 43.1% done | ETA(epoch): 460s + [Epoch 215][30/40] loss=0.755641 avg=0.851611 VRAM=38.9GiB | 43.1% done | ETA(epoch): 230s + [Epoch 215][40/40] loss=0.948110 avg=0.854048 VRAM=38.8GiB | 43.2% done | ETA(epoch): 0s + Train loss: 0.854048 (921.0s) ETA: 4752min + Val loss: 0.826595 [t_0.0-0.2=1.0880 t_0.2-0.4=1.0030 t_0.4-0.6=0.8627 t_0.6-0.8=0.7203 t_0.8-1.0=0.6380] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0215 + Deleted old checkpoint: checkpoint_epoch_0212 +[MEM @ epoch 215 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 216/499 (43% done) --- + [Epoch 216][10/40] loss=0.914684 avg=0.851996 VRAM=38.9GiB | 43.2% done | ETA(epoch): 691s + [Epoch 216][20/40] loss=0.830406 avg=0.843287 VRAM=38.8GiB | 43.3% done | ETA(epoch): 461s + [Epoch 216][30/40] loss=0.858122 avg=0.849796 VRAM=38.9GiB | 43.4% done | ETA(epoch): 230s + [Epoch 216][40/40] loss=0.904959 avg=0.857091 VRAM=38.8GiB | 43.4% done | ETA(epoch): 0s + Train loss: 0.857091 (920.9s) ETA: 4734min + Val loss: 0.845554 [t_0.0-0.2=1.0586 t_0.2-0.4=1.0272 t_0.4-0.6=0.8358 t_0.6-0.8=0.7336 t_0.8-1.0=0.6452] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0216 + Deleted old checkpoint: checkpoint_epoch_0213 +[MEM @ epoch 216 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 217/499 (43% done) --- + [Epoch 217][10/40] loss=0.917707 avg=0.874133 VRAM=38.9GiB | 43.5% done | ETA(epoch): 690s + [Epoch 217][20/40] loss=0.954788 avg=0.889674 VRAM=38.8GiB | 43.5% done | ETA(epoch): 460s + [Epoch 217][30/40] loss=0.868208 avg=0.873165 VRAM=38.9GiB | 43.5% done | ETA(epoch): 230s + [Epoch 217][40/40] loss=0.933801 avg=0.875708 VRAM=38.8GiB | 43.6% done | ETA(epoch): 0s + Train loss: 0.875708 (920.0s) ETA: 4717min + Val loss: 0.849193 [t_0.0-0.2=1.0580 t_0.2-0.4=1.0108 t_0.4-0.6=0.8596 t_0.6-0.8=0.7320 t_0.8-1.0=0.6484] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0217 + Deleted old checkpoint: checkpoint_epoch_0214 +[MEM @ epoch 217 end] RAM: 18.1/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 218/499 (44% done) --- + [Epoch 218][10/40] loss=0.737026 avg=0.828096 VRAM=38.9GiB | 43.6% done | ETA(epoch): 691s + [Epoch 218][20/40] loss=0.817435 avg=0.850352 VRAM=38.8GiB | 43.7% done | ETA(epoch): 461s + [Epoch 218][30/40] loss=0.770985 avg=0.855571 VRAM=38.9GiB | 43.8% done | ETA(epoch): 230s + [Epoch 218][40/40] loss=0.746214 avg=0.862154 VRAM=38.8GiB | 43.8% done | ETA(epoch): 0s + Train loss: 0.862154 (921.2s) ETA: 4700min + Val loss: 0.870332 [t_0.0-0.2=1.0779 t_0.2-0.4=1.0121 t_0.4-0.6=0.8377 t_0.6-0.8=0.6944 t_0.8-1.0=0.6384] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0218 + Deleted old checkpoint: checkpoint_epoch_0215 +[MEM @ epoch 218 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 219/499 (44% done) --- + [Epoch 219][10/40] loss=0.811257 avg=0.830094 VRAM=38.9GiB | 43.9% done | ETA(epoch): 690s + [Epoch 219][20/40] loss=0.885302 avg=0.839548 VRAM=38.8GiB | 43.9% done | ETA(epoch): 460s + [Epoch 219][30/40] loss=0.814045 avg=0.850884 VRAM=38.9GiB | 44.0% done | ETA(epoch): 230s + [Epoch 219][40/40] loss=0.802168 avg=0.847316 VRAM=38.8GiB | 44.0% done | ETA(epoch): 0s + Train loss: 0.847316 (920.3s) ETA: 4683min + Val loss: 0.835921 [t_0.0-0.2=1.0639 t_0.2-0.4=1.0060 t_0.4-0.6=0.8433 t_0.6-0.8=0.7173 t_0.8-1.0=0.6355] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0219 + Deleted old checkpoint: checkpoint_epoch_0216 +[MEM @ epoch 219 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 220/499 (44% done) --- + [Epoch 220][10/40] loss=0.864237 avg=0.854713 VRAM=38.9GiB | 44.0% done | ETA(epoch): 691s + [Epoch 220][20/40] loss=0.812975 avg=0.839866 VRAM=38.8GiB | 44.1% done | ETA(epoch): 460s + [Epoch 220][30/40] loss=0.826887 avg=0.849629 VRAM=38.9GiB | 44.1% done | ETA(epoch): 230s + [Epoch 220][40/40] loss=0.832291 avg=0.849371 VRAM=38.8GiB | 44.2% done | ETA(epoch): 0s + Train loss: 0.849371 (920.2s) ETA: 4666min + Val loss: 0.839337 [t_0.0-0.2=1.0537 t_0.2-0.4=1.0236 t_0.4-0.6=0.8186 t_0.6-0.8=0.6888 t_0.8-1.0=0.6429] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0220 + Deleted old checkpoint: checkpoint_epoch_0217 +[MEM @ epoch 220 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 221/499 (44% done) --- + [Epoch 221][10/40] loss=0.915046 avg=0.862840 VRAM=38.9GiB | 44.2% done | ETA(epoch): 690s + [Epoch 221][20/40] loss=0.827668 avg=0.857488 VRAM=38.8GiB | 44.3% done | ETA(epoch): 460s + [Epoch 221][30/40] loss=0.971696 avg=0.857424 VRAM=38.9GiB | 44.4% done | ETA(epoch): 230s + [Epoch 221][40/40] loss=0.822372 avg=0.852559 VRAM=38.8GiB | 44.4% done | ETA(epoch): 0s + Train loss: 0.852559 (920.3s) ETA: 4649min + Val loss: 0.818011 [t_0.0-0.2=1.0631 t_0.2-0.4=1.0126 t_0.4-0.6=0.8875 t_0.6-0.8=0.7026 t_0.8-1.0=0.6361] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0221 (BEST) + Deleted old checkpoint: checkpoint_epoch_0206 + Deleted old checkpoint: checkpoint_epoch_0218 +[MEM @ epoch 221 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 222/499 (44% done) --- + [Epoch 222][10/40] loss=0.874285 avg=0.874954 VRAM=38.9GiB | 44.5% done | ETA(epoch): 690s + [Epoch 222][20/40] loss=0.908423 avg=0.878107 VRAM=38.8GiB | 44.5% done | ETA(epoch): 460s + [Epoch 222][30/40] loss=0.952668 avg=0.868404 VRAM=38.9GiB | 44.5% done | ETA(epoch): 230s + [Epoch 222][40/40] loss=0.951408 avg=0.868346 VRAM=38.8GiB | 44.6% done | ETA(epoch): 0s + Train loss: 0.868346 (920.4s) ETA: 4632min + Val loss: 0.865279 [t_0.0-0.2=1.0684 t_0.2-0.4=1.0097 t_0.4-0.6=0.8222 t_0.6-0.8=0.6911 t_0.8-1.0=0.6430] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0222 + Deleted old checkpoint: checkpoint_epoch_0219 +[MEM @ epoch 222 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 223/499 (45% done) --- + [Epoch 223][10/40] loss=0.836714 avg=0.864919 VRAM=38.9GiB | 44.6% done | ETA(epoch): 689s + [Epoch 223][20/40] loss=0.817381 avg=0.857450 VRAM=38.8GiB | 44.7% done | ETA(epoch): 460s + [Epoch 223][30/40] loss=0.888861 avg=0.864131 VRAM=38.9GiB | 44.8% done | ETA(epoch): 230s + [Epoch 223][40/40] loss=0.870476 avg=0.859523 VRAM=38.8GiB | 44.8% done | ETA(epoch): 0s + Train loss: 0.859523 (920.0s) ETA: 4615min + Val loss: 0.848428 [t_0.0-0.2=1.0552 t_0.2-0.4=1.0003 t_0.4-0.6=0.8562 t_0.6-0.8=0.7077 t_0.8-1.0=0.6335] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0223 + Deleted old checkpoint: checkpoint_epoch_0220 +[MEM @ epoch 223 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 224/499 (45% done) --- + [Epoch 224][10/40] loss=0.793641 avg=0.854928 VRAM=38.9GiB | 44.9% done | ETA(epoch): 690s + [Epoch 224][20/40] loss=0.918641 avg=0.857368 VRAM=38.8GiB | 44.9% done | ETA(epoch): 460s + [Epoch 224][30/40] loss=0.913994 avg=0.864182 VRAM=38.9GiB | 45.0% done | ETA(epoch): 230s + [Epoch 224][40/40] loss=0.851368 avg=0.860972 VRAM=38.8GiB | 45.0% done | ETA(epoch): 0s + Train loss: 0.860972 (919.8s) ETA: 4598min + Val loss: 0.851926 [t_0.0-0.2=1.0765 t_0.2-0.4=0.9886 t_0.4-0.6=0.8756 t_0.6-0.8=0.7252 t_0.8-1.0=0.6435] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0224 +[MEM @ epoch 224 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 225/499 (45% done) --- + [Epoch 225][10/40] loss=0.769588 avg=0.813283 VRAM=38.9GiB | 45.1% done | ETA(epoch): 691s + [Epoch 225][20/40] loss=0.868231 avg=0.833688 VRAM=38.8GiB | 45.1% done | ETA(epoch): 460s + [Epoch 225][30/40] loss=0.861405 avg=0.838802 VRAM=38.9GiB | 45.1% done | ETA(epoch): 230s + [Epoch 225][40/40] loss=0.836095 avg=0.839133 VRAM=38.8GiB | 45.2% done | ETA(epoch): 0s + Train loss: 0.839133 (920.7s) ETA: 4581min + Val loss: 0.859261 [t_0.0-0.2=1.0872 t_0.2-0.4=0.9795 t_0.4-0.6=0.8371 t_0.6-0.8=0.7248 t_0.8-1.0=0.6277] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0225 + Deleted old checkpoint: checkpoint_epoch_0222 +[MEM @ epoch 225 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 226/499 (45% done) --- + [Epoch 226][10/40] loss=0.928235 avg=0.854900 VRAM=38.9GiB | 45.2% done | ETA(epoch): 690s + [Epoch 226][20/40] loss=0.945250 avg=0.862248 VRAM=38.8GiB | 45.3% done | ETA(epoch): 460s + [Epoch 226][30/40] loss=0.895555 avg=0.860689 VRAM=38.9GiB | 45.4% done | ETA(epoch): 230s + [Epoch 226][40/40] loss=0.853160 avg=0.857908 VRAM=38.8GiB | 45.4% done | ETA(epoch): 0s + Train loss: 0.857908 (919.9s) ETA: 4564min + Val loss: 0.877706 [t_0.0-0.2=1.0745 t_0.2-0.4=0.9893 t_0.4-0.6=0.8323 t_0.6-0.8=0.7318 t_0.8-1.0=0.6332] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0226 + Deleted old checkpoint: checkpoint_epoch_0223 +[MEM @ epoch 226 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 227/499 (45% done) --- + [Epoch 227][10/40] loss=0.779409 avg=0.830919 VRAM=38.9GiB | 45.5% done | ETA(epoch): 690s + [Epoch 227][20/40] loss=0.873064 avg=0.856305 VRAM=38.8GiB | 45.5% done | ETA(epoch): 460s + [Epoch 227][30/40] loss=0.938921 avg=0.855754 VRAM=38.9GiB | 45.6% done | ETA(epoch): 230s + [Epoch 227][40/40] loss=0.796436 avg=0.856672 VRAM=38.8GiB | 45.6% done | ETA(epoch): 0s + Train loss: 0.856672 (919.8s) ETA: 4547min + Val loss: 0.823795 [t_0.0-0.2=1.0691 t_0.2-0.4=1.0290 t_0.4-0.6=0.8440 t_0.6-0.8=0.7039 t_0.8-1.0=0.6463] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0227 + Deleted old checkpoint: checkpoint_epoch_0224 +[MEM @ epoch 227 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 228/499 (46% done) --- + [Epoch 228][10/40] loss=0.799186 avg=0.846438 VRAM=38.9GiB | 45.6% done | ETA(epoch): 690s + [Epoch 228][20/40] loss=0.843572 avg=0.847941 VRAM=38.8GiB | 45.7% done | ETA(epoch): 460s + [Epoch 228][30/40] loss=0.903933 avg=0.857601 VRAM=38.9GiB | 45.8% done | ETA(epoch): 230s + [Epoch 228][40/40] loss=0.804061 avg=0.857301 VRAM=38.8GiB | 45.8% done | ETA(epoch): 0s + Train loss: 0.857301 (919.6s) ETA: 4529min + Val loss: 0.865016 [t_0.0-0.2=1.0815 t_0.2-0.4=1.0018 t_0.4-0.6=0.8514 t_0.6-0.8=0.6950 t_0.8-1.0=0.6550] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0228 + Deleted old checkpoint: checkpoint_epoch_0225 +[MEM @ epoch 228 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 229/499 (46% done) --- + [Epoch 229][10/40] loss=0.929887 avg=0.843125 VRAM=38.9GiB | 45.9% done | ETA(epoch): 690s + [Epoch 229][20/40] loss=0.785561 avg=0.844206 VRAM=38.8GiB | 45.9% done | ETA(epoch): 460s + [Epoch 229][30/40] loss=0.826610 avg=0.844938 VRAM=38.9GiB | 46.0% done | ETA(epoch): 230s + [Epoch 229][40/40] loss=0.934166 avg=0.846926 VRAM=38.8GiB | 46.0% done | ETA(epoch): 0s + Train loss: 0.846926 (920.5s) ETA: 4512min + Val loss: 0.838931 [t_0.0-0.2=1.0871 t_0.2-0.4=0.9959 t_0.4-0.6=0.8429 t_0.6-0.8=0.7115 t_0.8-1.0=0.6311] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0229 + Deleted old checkpoint: checkpoint_epoch_0226 +[MEM @ epoch 229 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 230/499 (46% done) --- + [Epoch 230][10/40] loss=0.876578 avg=0.865052 VRAM=38.9GiB | 46.1% done | ETA(epoch): 689s + [Epoch 230][20/40] loss=0.859891 avg=0.856843 VRAM=38.8GiB | 46.1% done | ETA(epoch): 460s + [Epoch 230][30/40] loss=0.806373 avg=0.859470 VRAM=38.9GiB | 46.2% done | ETA(epoch): 230s + [Epoch 230][40/40] loss=0.911582 avg=0.856941 VRAM=38.8GiB | 46.2% done | ETA(epoch): 0s + Train loss: 0.856941 (919.9s) ETA: 4495min + Val loss: 0.865503 [t_0.0-0.2=1.0602 t_0.2-0.4=1.0282 t_0.4-0.6=0.8458 t_0.6-0.8=0.7119 t_0.8-1.0=0.6455] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0230 + Deleted old checkpoint: checkpoint_epoch_0227 +[MEM @ epoch 230 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 231/499 (46% done) --- + [Epoch 231][10/40] loss=0.879974 avg=0.883890 VRAM=38.9GiB | 46.2% done | ETA(epoch): 690s + [Epoch 231][20/40] loss=0.811601 avg=0.861743 VRAM=38.8GiB | 46.3% done | ETA(epoch): 460s + [Epoch 231][30/40] loss=0.884608 avg=0.854588 VRAM=38.9GiB | 46.4% done | ETA(epoch): 230s + [Epoch 231][40/40] loss=0.785839 avg=0.855102 VRAM=38.8GiB | 46.4% done | ETA(epoch): 0s + Train loss: 0.855102 (920.0s) ETA: 4478min + Val loss: 0.855606 [t_0.0-0.2=1.0765 t_0.2-0.4=1.0021 t_0.4-0.6=0.8222 t_0.6-0.8=0.7195 t_0.8-1.0=0.6311] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0231 + Deleted old checkpoint: checkpoint_epoch_0228 +[MEM @ epoch 231 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 232/499 (46% done) --- + [Epoch 232][10/40] loss=0.797874 avg=0.808713 VRAM=38.9GiB | 46.5% done | ETA(epoch): 690s + [Epoch 232][20/40] loss=0.812237 avg=0.823544 VRAM=38.8GiB | 46.5% done | ETA(epoch): 460s + [Epoch 232][30/40] loss=0.880899 avg=0.827064 VRAM=38.9GiB | 46.6% done | ETA(epoch): 230s + [Epoch 232][40/40] loss=0.851934 avg=0.843092 VRAM=38.8GiB | 46.6% done | ETA(epoch): 0s + Train loss: 0.843092 (920.5s) ETA: 4461min + Val loss: 0.839994 [t_0.0-0.2=1.0752 t_0.2-0.4=1.0153 t_0.4-0.6=0.8424 t_0.6-0.8=0.7095 t_0.8-1.0=0.6440] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0232 + Deleted old checkpoint: checkpoint_epoch_0229 +[MEM @ epoch 232 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 233/499 (47% done) --- + [Epoch 233][10/40] loss=0.845483 avg=0.838279 VRAM=38.9GiB | 46.7% done | ETA(epoch): 691s + [Epoch 233][20/40] loss=0.871474 avg=0.848009 VRAM=38.8GiB | 46.7% done | ETA(epoch): 460s + [Epoch 233][30/40] loss=0.831360 avg=0.847782 VRAM=38.9GiB | 46.8% done | ETA(epoch): 230s + [Epoch 233][40/40] loss=0.833524 avg=0.847480 VRAM=38.8GiB | 46.8% done | ETA(epoch): 0s + Train loss: 0.847480 (920.9s) ETA: 4444min + Val loss: 0.845392 [t_0.0-0.2=1.0634 t_0.2-0.4=0.9799 t_0.4-0.6=0.8468 t_0.6-0.8=0.7128 t_0.8-1.0=0.6366] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0233 + Deleted old checkpoint: checkpoint_epoch_0230 +[MEM @ epoch 233 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 234/499 (47% done) --- + [Epoch 234][10/40] loss=0.943981 avg=0.855536 VRAM=38.9GiB | 46.9% done | ETA(epoch): 690s + [Epoch 234][20/40] loss=0.869521 avg=0.851085 VRAM=38.8GiB | 46.9% done | ETA(epoch): 460s + [Epoch 234][30/40] loss=0.823252 avg=0.851714 VRAM=38.9GiB | 46.9% done | ETA(epoch): 230s + [Epoch 234][40/40] loss=0.921195 avg=0.854552 VRAM=38.8GiB | 47.0% done | ETA(epoch): 0s + Train loss: 0.854552 (920.4s) ETA: 4427min + Val loss: 0.878202 [t_0.0-0.2=1.0966 t_0.2-0.4=1.0015 t_0.4-0.6=0.8366 t_0.6-0.8=0.7194 t_0.8-1.0=0.6345] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0234 + Deleted old checkpoint: checkpoint_epoch_0231 +[MEM @ epoch 234 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 235/499 (47% done) --- + [Epoch 235][10/40] loss=0.785781 avg=0.841834 VRAM=38.9GiB | 47.0% done | ETA(epoch): 690s + [Epoch 235][20/40] loss=0.965362 avg=0.840431 VRAM=38.8GiB | 47.1% done | ETA(epoch): 460s + [Epoch 235][30/40] loss=0.842651 avg=0.847285 VRAM=38.9GiB | 47.1% done | ETA(epoch): 230s + [Epoch 235][40/40] loss=0.829427 avg=0.853731 VRAM=38.8GiB | 47.2% done | ETA(epoch): 0s + Train loss: 0.853731 (920.6s) ETA: 4410min + Val loss: 0.885904 [t_0.0-0.2=1.0588 t_0.2-0.4=1.0043 t_0.4-0.6=0.8288 t_0.6-0.8=0.7050 t_0.8-1.0=0.6307] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0235 + Deleted old checkpoint: checkpoint_epoch_0232 +[MEM @ epoch 235 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 236/499 (47% done) --- + [Epoch 236][10/40] loss=0.850551 avg=0.878668 VRAM=38.9GiB | 47.2% done | ETA(epoch): 690s + [Epoch 236][20/40] loss=0.801765 avg=0.853182 VRAM=38.8GiB | 47.3% done | ETA(epoch): 460s + [Epoch 236][30/40] loss=0.770514 avg=0.843625 VRAM=38.9GiB | 47.3% done | ETA(epoch): 230s + [Epoch 236][40/40] loss=0.837005 avg=0.838761 VRAM=38.8GiB | 47.4% done | ETA(epoch): 0s + Train loss: 0.838761 (919.9s) ETA: 4393min + Val loss: 0.863689 [t_0.0-0.2=1.0744 t_0.2-0.4=0.9888 t_0.4-0.6=0.8841 t_0.6-0.8=0.7146 t_0.8-1.0=0.6304] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0236 + Deleted old checkpoint: checkpoint_epoch_0233 +[MEM @ epoch 236 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 237/499 (47% done) --- + [Epoch 237][10/40] loss=0.786946 avg=0.849351 VRAM=38.9GiB | 47.4% done | ETA(epoch): 691s + [Epoch 237][20/40] loss=0.851011 avg=0.860030 VRAM=38.8GiB | 47.5% done | ETA(epoch): 461s + [Epoch 237][30/40] loss=0.842442 avg=0.853693 VRAM=38.9GiB | 47.5% done | ETA(epoch): 230s + [Epoch 237][40/40] loss=0.726182 avg=0.847719 VRAM=38.8GiB | 47.6% done | ETA(epoch): 0s + Train loss: 0.847719 (921.2s) ETA: 4376min + Val loss: 0.812170 [t_0.0-0.2=1.0636 t_0.2-0.4=0.9923 t_0.4-0.6=0.8024 t_0.6-0.8=0.7180 t_0.8-1.0=0.6420] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0237 (BEST) + Deleted old checkpoint: checkpoint_epoch_0221 + Deleted old checkpoint: checkpoint_epoch_0234 +[MEM @ epoch 237 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 238/499 (48% done) --- + [Epoch 238][10/40] loss=0.885494 avg=0.848730 VRAM=38.9GiB | 47.6% done | ETA(epoch): 690s + [Epoch 238][20/40] loss=0.784068 avg=0.839022 VRAM=38.8GiB | 47.7% done | ETA(epoch): 460s + [Epoch 238][30/40] loss=0.869839 avg=0.838206 VRAM=38.9GiB | 47.8% done | ETA(epoch): 230s + [Epoch 238][40/40] loss=0.891312 avg=0.849320 VRAM=38.8GiB | 47.8% done | ETA(epoch): 0s + Train loss: 0.849320 (920.4s) ETA: 4359min + Val loss: 0.846092 [t_0.0-0.2=1.0597 t_0.2-0.4=0.9906 t_0.4-0.6=0.8440 t_0.6-0.8=0.7097 t_0.8-1.0=0.6297] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0238 + Deleted old checkpoint: checkpoint_epoch_0235 +[MEM @ epoch 238 end] RAM: 18.2/188.4 GiB (9.6%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 239/499 (48% done) --- + [Epoch 239][10/40] loss=0.843982 avg=0.837529 VRAM=38.9GiB | 47.9% done | ETA(epoch): 691s + [Epoch 239][20/40] loss=0.841176 avg=0.852926 VRAM=38.8GiB | 47.9% done | ETA(epoch): 461s + [Epoch 239][30/40] loss=0.936632 avg=0.850630 VRAM=38.9GiB | 47.9% done | ETA(epoch): 230s + [Epoch 239][40/40] loss=0.814261 avg=0.843986 VRAM=38.8GiB | 48.0% done | ETA(epoch): 0s + Train loss: 0.843986 (921.7s) ETA: 4342min + Val loss: 0.858198 [t_0.0-0.2=1.0807 t_0.2-0.4=1.0129 t_0.4-0.6=0.8415 t_0.6-0.8=0.7209 t_0.8-1.0=0.6444] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0239 + Deleted old checkpoint: checkpoint_epoch_0236 +[MEM @ epoch 239 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 240/499 (48% done) --- + [Epoch 240][10/40] loss=0.885065 avg=0.835049 VRAM=38.9GiB | 48.0% done | ETA(epoch): 691s + [Epoch 240][20/40] loss=0.865588 avg=0.841683 VRAM=38.8GiB | 48.1% done | ETA(epoch): 460s + [Epoch 240][30/40] loss=0.791248 avg=0.851758 VRAM=38.9GiB | 48.1% done | ETA(epoch): 230s + [Epoch 240][40/40] loss=0.795923 avg=0.852467 VRAM=38.8GiB | 48.2% done | ETA(epoch): 0s + Train loss: 0.852467 (920.9s) ETA: 4325min + Val loss: 0.821676 [t_0.0-0.2=1.0630 t_0.2-0.4=0.9897 t_0.4-0.6=0.8475 t_0.6-0.8=0.7269 t_0.8-1.0=0.6344] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0240 +[MEM @ epoch 240 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 241/499 (48% done) --- + [Epoch 241][10/40] loss=0.911960 avg=0.911394 VRAM=38.9GiB | 48.2% done | ETA(epoch): 691s + [Epoch 241][20/40] loss=0.878351 avg=0.892987 VRAM=38.8GiB | 48.3% done | ETA(epoch): 461s + [Epoch 241][30/40] loss=0.825527 avg=0.872806 VRAM=38.9GiB | 48.4% done | ETA(epoch): 230s + [Epoch 241][40/40] loss=0.825447 avg=0.866712 VRAM=38.8GiB | 48.4% done | ETA(epoch): 0s + Train loss: 0.866712 (921.0s) ETA: 4308min + Val loss: 0.880156 [t_0.0-0.2=1.0612 t_0.2-0.4=0.9880 t_0.4-0.6=0.8040 t_0.6-0.8=0.7144 t_0.8-1.0=0.6404] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0241 + Deleted old checkpoint: checkpoint_epoch_0238 +[MEM @ epoch 241 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 242/499 (48% done) --- + [Epoch 242][10/40] loss=0.788820 avg=0.843936 VRAM=38.9GiB | 48.4% done | ETA(epoch): 690s + [Epoch 242][20/40] loss=0.884181 avg=0.852106 VRAM=38.8GiB | 48.5% done | ETA(epoch): 460s + [Epoch 242][30/40] loss=0.775138 avg=0.862236 VRAM=38.9GiB | 48.5% done | ETA(epoch): 230s + [Epoch 242][40/40] loss=0.906241 avg=0.856983 VRAM=38.8GiB | 48.6% done | ETA(epoch): 0s + Train loss: 0.856983 (921.0s) ETA: 4291min + Val loss: 0.859180 [t_0.0-0.2=1.0895 t_0.2-0.4=1.0029 t_0.4-0.6=0.8398 t_0.6-0.8=0.7213 t_0.8-1.0=0.6350] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0242 + Deleted old checkpoint: checkpoint_epoch_0239 +[MEM @ epoch 242 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 243/499 (49% done) --- + [Epoch 243][10/40] loss=0.882557 avg=0.853715 VRAM=38.9GiB | 48.6% done | ETA(epoch): 691s + [Epoch 243][20/40] loss=0.772919 avg=0.859542 VRAM=38.8GiB | 48.7% done | ETA(epoch): 461s + [Epoch 243][30/40] loss=0.844805 avg=0.854325 VRAM=38.9GiB | 48.8% done | ETA(epoch): 230s + [Epoch 243][40/40] loss=0.758447 avg=0.850433 VRAM=38.8GiB | 48.8% done | ETA(epoch): 0s + Train loss: 0.850433 (921.3s) ETA: 4274min + Val loss: 0.841062 [t_0.0-0.2=1.0938 t_0.2-0.4=0.9854 t_0.4-0.6=0.8814 t_0.6-0.8=0.6983 t_0.8-1.0=0.6296] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0243 + Deleted old checkpoint: checkpoint_epoch_0240 +[MEM @ epoch 243 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 244/499 (49% done) --- + [Epoch 244][10/40] loss=0.780397 avg=0.837038 VRAM=38.9GiB | 48.9% done | ETA(epoch): 690s + [Epoch 244][20/40] loss=0.904463 avg=0.844246 VRAM=38.8GiB | 48.9% done | ETA(epoch): 460s + [Epoch 244][30/40] loss=0.884549 avg=0.853754 VRAM=38.9GiB | 48.9% done | ETA(epoch): 230s + [Epoch 244][40/40] loss=0.916620 avg=0.860146 VRAM=38.8GiB | 49.0% done | ETA(epoch): 0s + Train loss: 0.860146 (921.0s) ETA: 4258min + Val loss: 0.832261 [t_0.0-0.2=1.0664 t_0.2-0.4=0.9966 t_0.4-0.6=0.8474 t_0.6-0.8=0.7166 t_0.8-1.0=0.6277] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0244 + Deleted old checkpoint: checkpoint_epoch_0241 +[MEM @ epoch 244 end] RAM: 18.3/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 245/499 (49% done) --- + [Epoch 245][10/40] loss=0.976382 avg=0.883401 VRAM=38.9GiB | 49.0% done | ETA(epoch): 690s + [Epoch 245][20/40] loss=0.864059 avg=0.882930 VRAM=38.8GiB | 49.1% done | ETA(epoch): 460s + [Epoch 245][30/40] loss=0.987184 avg=0.876714 VRAM=38.9GiB | 49.1% done | ETA(epoch): 230s + [Epoch 245][40/40] loss=0.886455 avg=0.870093 VRAM=38.8GiB | 49.2% done | ETA(epoch): 0s + Train loss: 0.870093 (920.7s) ETA: 4241min + Val loss: 0.837766 [t_0.0-0.2=1.0778 t_0.2-0.4=1.0029 t_0.4-0.6=0.8413 t_0.6-0.8=0.7204 t_0.8-1.0=0.6348] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0245 + Deleted old checkpoint: checkpoint_epoch_0242 +[MEM @ epoch 245 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 246/499 (49% done) --- + [Epoch 246][10/40] loss=0.915715 avg=0.827733 VRAM=38.9GiB | 49.2% done | ETA(epoch): 690s + [Epoch 246][20/40] loss=0.953414 avg=0.847461 VRAM=38.8GiB | 49.3% done | ETA(epoch): 460s + [Epoch 246][30/40] loss=0.730010 avg=0.850648 VRAM=38.9GiB | 49.4% done | ETA(epoch): 230s + [Epoch 246][40/40] loss=0.799083 avg=0.839613 VRAM=38.8GiB | 49.4% done | ETA(epoch): 0s + Train loss: 0.839613 (920.8s) ETA: 4224min + Val loss: 0.863257 [t_0.0-0.2=1.0660 t_0.2-0.4=1.0266 t_0.4-0.6=0.8483 t_0.6-0.8=0.6951 t_0.8-1.0=0.6387] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0246 + Deleted old checkpoint: checkpoint_epoch_0243 +[MEM @ epoch 246 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 247/499 (49% done) --- + [Epoch 247][10/40] loss=0.822486 avg=0.813087 VRAM=38.9GiB | 49.5% done | ETA(epoch): 691s + [Epoch 247][20/40] loss=0.880567 avg=0.830686 VRAM=38.8GiB | 49.5% done | ETA(epoch): 460s + [Epoch 247][30/40] loss=0.876856 avg=0.835565 VRAM=38.9GiB | 49.5% done | ETA(epoch): 230s + [Epoch 247][40/40] loss=0.766706 avg=0.831764 VRAM=38.8GiB | 49.6% done | ETA(epoch): 0s + Train loss: 0.831764 (921.0s) ETA: 4207min + Val loss: 0.856820 [t_0.0-0.2=1.0731 t_0.2-0.4=0.9673 t_0.4-0.6=0.9032 t_0.6-0.8=0.7146 t_0.8-1.0=0.6332] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0247 + Deleted old checkpoint: checkpoint_epoch_0244 +[MEM @ epoch 247 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 248/499 (50% done) --- + [Epoch 248][10/40] loss=0.872312 avg=0.888409 VRAM=38.9GiB | 49.6% done | ETA(epoch): 691s + [Epoch 248][20/40] loss=0.837816 avg=0.858931 VRAM=38.8GiB | 49.7% done | ETA(epoch): 460s + [Epoch 248][30/40] loss=0.844088 avg=0.860061 VRAM=38.9GiB | 49.8% done | ETA(epoch): 230s + [Epoch 248][40/40] loss=0.883605 avg=0.857838 VRAM=38.8GiB | 49.8% done | ETA(epoch): 0s + Train loss: 0.857838 (920.9s) ETA: 4190min + Val loss: 0.841848 [t_0.0-0.2=1.0646 t_0.2-0.4=1.0132 t_0.4-0.6=0.8271 t_0.6-0.8=0.7031 t_0.8-1.0=0.6413] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0248 + Deleted old checkpoint: checkpoint_epoch_0245 +[MEM @ epoch 248 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 249/499 (50% done) --- + [Epoch 249][10/40] loss=0.839760 avg=0.840874 VRAM=38.9GiB | 49.9% done | ETA(epoch): 691s + [Epoch 249][20/40] loss=0.757437 avg=0.835143 VRAM=38.8GiB | 49.9% done | ETA(epoch): 461s + [Epoch 249][30/40] loss=0.801767 avg=0.836132 VRAM=38.9GiB | 50.0% done | ETA(epoch): 230s + [Epoch 249][40/40] loss=0.874051 avg=0.834632 VRAM=38.8GiB | 50.0% done | ETA(epoch): 0s + Train loss: 0.834632 (921.1s) ETA: 4173min + Val loss: 0.856747 [t_0.0-0.2=1.0532 t_0.2-0.4=0.9853 t_0.4-0.6=0.9184 t_0.6-0.8=0.7188 t_0.8-1.0=0.6360] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0249 + Deleted old checkpoint: checkpoint_epoch_0246 +[MEM @ epoch 249 end] RAM: 18.2/188.4 GiB (9.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 250/499 (50% done) --- + [MilestoneVis] train_0 step 10000 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 10000 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 10000 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 10000 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 10000 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 10000 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 10000 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 10000 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 10000 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 10000 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + [Epoch 250][10/40] loss=0.868997 avg=0.846959 VRAM=38.9GiB | 50.0% done | ETA(epoch): 2944s + [Epoch 250][20/40] loss=0.812395 avg=0.854416 VRAM=38.8GiB | 50.1% done | ETA(epoch): 1212s + [Epoch 250][30/40] loss=0.879211 avg=0.853301 VRAM=38.9GiB | 50.1% done | ETA(epoch): 481s + [Epoch 250][40/40] loss=0.768351 avg=0.860480 VRAM=38.8GiB | 50.2% done | ETA(epoch): 0s + Train loss: 0.860480 (1672.2s) ETA: 4168min + Val loss: 0.869657 [t_0.0-0.2=1.0754 t_0.2-0.4=1.0060 t_0.4-0.6=0.8506 t_0.6-0.8=0.7195 t_0.8-1.0=0.6370] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0250 + Deleted old checkpoint: checkpoint_epoch_0247 +[MEM @ epoch 250 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 251/499 (50% done) --- + [Epoch 251][10/40] loss=0.826980 avg=0.846836 VRAM=38.9GiB | 50.2% done | ETA(epoch): 691s + [Epoch 251][20/40] loss=0.712084 avg=0.837804 VRAM=38.8GiB | 50.3% done | ETA(epoch): 461s + [Epoch 251][30/40] loss=0.864280 avg=0.847978 VRAM=38.9GiB | 50.3% done | ETA(epoch): 230s + [Epoch 251][40/40] loss=0.857494 avg=0.849213 VRAM=38.8GiB | 50.4% done | ETA(epoch): 0s + Train loss: 0.849213 (921.4s) ETA: 4151min + Val loss: 0.843733 [t_0.0-0.2=1.0743 t_0.2-0.4=0.9989 t_0.4-0.6=0.8588 t_0.6-0.8=0.6815 t_0.8-1.0=0.6322] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0251 + Deleted old checkpoint: checkpoint_epoch_0248 +[MEM @ epoch 251 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 252/499 (50% done) --- + [Epoch 252][10/40] loss=0.871083 avg=0.887450 VRAM=38.9GiB | 50.4% done | ETA(epoch): 690s + [Epoch 252][20/40] loss=0.757345 avg=0.852013 VRAM=38.8GiB | 50.5% done | ETA(epoch): 460s + [Epoch 252][30/40] loss=0.755649 avg=0.853106 VRAM=38.9GiB | 50.5% done | ETA(epoch): 230s + [Epoch 252][40/40] loss=0.949577 avg=0.848642 VRAM=38.8GiB | 50.6% done | ETA(epoch): 0s + Train loss: 0.848642 (920.8s) ETA: 4134min + Val loss: 0.823733 [t_0.0-0.2=1.0838 t_0.2-0.4=0.9948 t_0.4-0.6=0.8269 t_0.6-0.8=0.7145 t_0.8-1.0=0.6195] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0252 + Deleted old checkpoint: checkpoint_epoch_0249 +[MEM @ epoch 252 end] RAM: 19.0/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 253/499 (51% done) --- + [Epoch 253][10/40] loss=0.780435 avg=0.843330 VRAM=38.9GiB | 50.6% done | ETA(epoch): 691s + [Epoch 253][20/40] loss=0.877315 avg=0.836576 VRAM=38.8GiB | 50.7% done | ETA(epoch): 460s + [Epoch 253][30/40] loss=0.753002 avg=0.839947 VRAM=38.9GiB | 50.7% done | ETA(epoch): 230s + [Epoch 253][40/40] loss=0.874371 avg=0.852062 VRAM=38.8GiB | 50.8% done | ETA(epoch): 0s + Train loss: 0.852062 (920.5s) ETA: 4117min + Val loss: 0.889340 [t_0.0-0.2=1.0577 t_0.2-0.4=1.0287 t_0.4-0.6=0.8401 t_0.6-0.8=0.7156 t_0.8-1.0=0.6595] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0253 + Deleted old checkpoint: checkpoint_epoch_0250 +[MEM @ epoch 253 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 254/499 (51% done) --- + [Epoch 254][10/40] loss=0.838563 avg=0.862151 VRAM=38.9GiB | 50.8% done | ETA(epoch): 690s + [Epoch 254][20/40] loss=0.985767 avg=0.870102 VRAM=38.8GiB | 50.9% done | ETA(epoch): 460s + [Epoch 254][30/40] loss=0.821668 avg=0.863915 VRAM=38.9GiB | 50.9% done | ETA(epoch): 230s + [Epoch 254][40/40] loss=0.799157 avg=0.860044 VRAM=38.8GiB | 51.0% done | ETA(epoch): 0s + Train loss: 0.860044 (919.7s) ETA: 4100min + Val loss: 0.863890 [t_0.0-0.2=1.0773 t_0.2-0.4=0.9751 t_0.4-0.6=0.8022 t_0.6-0.8=0.7076 t_0.8-1.0=0.6341] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0254 + Deleted old checkpoint: checkpoint_epoch_0251 +[MEM @ epoch 254 end] RAM: 18.9/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 255/499 (51% done) --- + [Epoch 255][10/40] loss=0.849092 avg=0.844616 VRAM=38.9GiB | 51.0% done | ETA(epoch): 690s + [Epoch 255][20/40] loss=0.952950 avg=0.850666 VRAM=38.8GiB | 51.1% done | ETA(epoch): 460s + [Epoch 255][30/40] loss=0.798017 avg=0.849784 VRAM=38.9GiB | 51.1% done | ETA(epoch): 230s + [Epoch 255][40/40] loss=0.844383 avg=0.838750 VRAM=38.8GiB | 51.2% done | ETA(epoch): 0s + Train loss: 0.838750 (920.2s) ETA: 4083min + Val loss: 0.841095 [t_0.0-0.2=1.0579 t_0.2-0.4=0.9670 t_0.4-0.6=0.8696 t_0.6-0.8=0.7121 t_0.8-1.0=0.6295] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0255 + Deleted old checkpoint: checkpoint_epoch_0252 +[MEM @ epoch 255 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 256/499 (51% done) --- + [Epoch 256][10/40] loss=0.981141 avg=0.886239 VRAM=38.9GiB | 51.2% done | ETA(epoch): 690s + [Epoch 256][20/40] loss=0.947971 avg=0.872226 VRAM=38.8GiB | 51.3% done | ETA(epoch): 460s + [Epoch 256][30/40] loss=0.791784 avg=0.877775 VRAM=38.9GiB | 51.3% done | ETA(epoch): 230s + [Epoch 256][40/40] loss=0.802106 avg=0.880271 VRAM=38.8GiB | 51.4% done | ETA(epoch): 0s + Train loss: 0.880271 (920.5s) ETA: 4066min + Val loss: 0.812926 [t_0.0-0.2=1.0575 t_0.2-0.4=1.0076 t_0.4-0.6=0.8313 t_0.6-0.8=0.7235 t_0.8-1.0=0.6320] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0256 + Deleted old checkpoint: checkpoint_epoch_0253 +[MEM @ epoch 256 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 257/499 (51% done) --- + [Epoch 257][10/40] loss=0.896300 avg=0.850498 VRAM=38.9GiB | 51.4% done | ETA(epoch): 691s + [Epoch 257][20/40] loss=0.932379 avg=0.866167 VRAM=38.8GiB | 51.5% done | ETA(epoch): 460s + [Epoch 257][30/40] loss=0.932798 avg=0.856009 VRAM=38.9GiB | 51.5% done | ETA(epoch): 230s + [Epoch 257][40/40] loss=0.836413 avg=0.854426 VRAM=38.8GiB | 51.6% done | ETA(epoch): 0s + Train loss: 0.854426 (920.8s) ETA: 4049min + Val loss: 0.875288 [t_0.0-0.2=1.0638 t_0.2-0.4=0.9923 t_0.4-0.6=0.8415 t_0.6-0.8=0.7196 t_0.8-1.0=0.6330] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0257 + Deleted old checkpoint: checkpoint_epoch_0254 +[MEM @ epoch 257 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 258/499 (52% done) --- + [Epoch 258][10/40] loss=0.778256 avg=0.848505 VRAM=38.9GiB | 51.6% done | ETA(epoch): 690s + [Epoch 258][20/40] loss=0.882181 avg=0.871210 VRAM=38.8GiB | 51.7% done | ETA(epoch): 460s + [Epoch 258][30/40] loss=0.872505 avg=0.864454 VRAM=38.9GiB | 51.7% done | ETA(epoch): 230s + [Epoch 258][40/40] loss=0.905278 avg=0.852664 VRAM=38.8GiB | 51.8% done | ETA(epoch): 0s + Train loss: 0.852664 (920.5s) ETA: 4032min + Val loss: 0.829912 [t_0.0-0.2=1.0690 t_0.2-0.4=0.9935 t_0.4-0.6=0.8365 t_0.6-0.8=0.7286 t_0.8-1.0=0.6347] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0258 + Deleted old checkpoint: checkpoint_epoch_0255 +[MEM @ epoch 258 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 259/499 (52% done) --- + [Epoch 259][10/40] loss=0.888651 avg=0.856734 VRAM=38.9GiB | 51.8% done | ETA(epoch): 691s + [Epoch 259][20/40] loss=0.904630 avg=0.862112 VRAM=38.8GiB | 51.9% done | ETA(epoch): 461s + [Epoch 259][30/40] loss=0.891096 avg=0.856872 VRAM=38.9GiB | 51.9% done | ETA(epoch): 230s + [Epoch 259][40/40] loss=0.802556 avg=0.855816 VRAM=38.8GiB | 52.0% done | ETA(epoch): 0s + Train loss: 0.855816 (921.1s) ETA: 4015min + Val loss: 0.830432 [t_0.0-0.2=1.0717 t_0.2-0.4=0.9597 t_0.4-0.6=0.8120 t_0.6-0.8=0.7237 t_0.8-1.0=0.6330] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0259 + Deleted old checkpoint: checkpoint_epoch_0256 +[MEM @ epoch 259 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 260/499 (52% done) --- + [Epoch 260][10/40] loss=0.879443 avg=0.861674 VRAM=38.9GiB | 52.0% done | ETA(epoch): 691s + [Epoch 260][20/40] loss=0.951544 avg=0.848613 VRAM=38.8GiB | 52.1% done | ETA(epoch): 461s + [Epoch 260][30/40] loss=0.812677 avg=0.854595 VRAM=38.9GiB | 52.1% done | ETA(epoch): 230s + [Epoch 260][40/40] loss=0.894125 avg=0.850567 VRAM=38.8GiB | 52.2% done | ETA(epoch): 0s + Train loss: 0.850567 (921.3s) ETA: 3998min + Val loss: 0.819406 [t_0.0-0.2=1.0790 t_0.2-0.4=0.9876 t_0.4-0.6=0.8485 t_0.6-0.8=0.6946 t_0.8-1.0=0.6319] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0260 + Deleted old checkpoint: checkpoint_epoch_0257 +[MEM @ epoch 260 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 261/499 (52% done) --- + [Epoch 261][10/40] loss=0.841875 avg=0.859202 VRAM=38.9GiB | 52.2% done | ETA(epoch): 690s + [Epoch 261][20/40] loss=0.884558 avg=0.863726 VRAM=38.8GiB | 52.3% done | ETA(epoch): 460s + [Epoch 261][30/40] loss=0.824650 avg=0.859752 VRAM=38.9GiB | 52.3% done | ETA(epoch): 230s + [Epoch 261][40/40] loss=0.928013 avg=0.862695 VRAM=38.8GiB | 52.4% done | ETA(epoch): 0s + Train loss: 0.862695 (921.0s) ETA: 3981min + Val loss: 0.858647 [t_0.0-0.2=1.0784 t_0.2-0.4=1.0113 t_0.4-0.6=0.8590 t_0.6-0.8=0.6843 t_0.8-1.0=0.6457] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0261 + Deleted old checkpoint: checkpoint_epoch_0258 +[MEM @ epoch 261 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 262/499 (52% done) --- + [Epoch 262][10/40] loss=0.844124 avg=0.854640 VRAM=38.9GiB | 52.4% done | ETA(epoch): 690s + [Epoch 262][20/40] loss=0.871426 avg=0.851626 VRAM=38.8GiB | 52.5% done | ETA(epoch): 460s + [Epoch 262][30/40] loss=0.812331 avg=0.848167 VRAM=38.9GiB | 52.5% done | ETA(epoch): 230s + [Epoch 262][40/40] loss=0.783342 avg=0.844652 VRAM=38.8GiB | 52.6% done | ETA(epoch): 0s + Train loss: 0.844652 (920.8s) ETA: 3964min + Val loss: 0.859297 [t_0.0-0.2=1.0675 t_0.2-0.4=1.0024 t_0.4-0.6=0.8572 t_0.6-0.8=0.7336 t_0.8-1.0=0.6275] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0262 + Deleted old checkpoint: checkpoint_epoch_0259 +[MEM @ epoch 262 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 263/499 (53% done) --- + [Epoch 263][10/40] loss=0.804649 avg=0.809055 VRAM=38.9GiB | 52.6% done | ETA(epoch): 690s + [Epoch 263][20/40] loss=0.779958 avg=0.830860 VRAM=38.8GiB | 52.7% done | ETA(epoch): 460s + [Epoch 263][30/40] loss=0.757219 avg=0.836663 VRAM=38.9GiB | 52.8% done | ETA(epoch): 230s + [Epoch 263][40/40] loss=0.827060 avg=0.838265 VRAM=38.8GiB | 52.8% done | ETA(epoch): 0s + Train loss: 0.838265 (920.9s) ETA: 3947min + Val loss: 0.863035 [t_0.0-0.2=1.0595 t_0.2-0.4=1.0167 t_0.4-0.6=0.8638 t_0.6-0.8=0.7202 t_0.8-1.0=0.6477] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0263 + Deleted old checkpoint: checkpoint_epoch_0260 +[MEM @ epoch 263 end] RAM: 18.9/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 264/499 (53% done) --- + [Epoch 264][10/40] loss=0.918469 avg=0.908535 VRAM=38.9GiB | 52.8% done | ETA(epoch): 691s + [Epoch 264][20/40] loss=0.930611 avg=0.895810 VRAM=38.8GiB | 52.9% done | ETA(epoch): 461s + [Epoch 264][30/40] loss=0.896958 avg=0.881865 VRAM=38.9GiB | 52.9% done | ETA(epoch): 230s + [Epoch 264][40/40] loss=0.916840 avg=0.879490 VRAM=38.8GiB | 53.0% done | ETA(epoch): 0s + Train loss: 0.879490 (921.0s) ETA: 3930min + Val loss: 0.860751 [t_0.0-0.2=1.0734 t_0.2-0.4=0.9907 t_0.4-0.6=0.8187 t_0.6-0.8=0.7258 t_0.8-1.0=0.6283] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0264 + Deleted old checkpoint: checkpoint_epoch_0261 +[MEM @ epoch 264 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 265/499 (53% done) --- + [Epoch 265][10/40] loss=0.778642 avg=0.839639 VRAM=38.9GiB | 53.0% done | ETA(epoch): 690s + [Epoch 265][20/40] loss=0.869178 avg=0.837413 VRAM=38.8GiB | 53.1% done | ETA(epoch): 460s + [Epoch 265][30/40] loss=0.752551 avg=0.847390 VRAM=38.9GiB | 53.1% done | ETA(epoch): 230s + [Epoch 265][40/40] loss=0.762611 avg=0.844861 VRAM=38.8GiB | 53.2% done | ETA(epoch): 0s + Train loss: 0.844861 (920.8s) ETA: 3913min + Val loss: 0.862308 [t_0.0-0.2=1.0652 t_0.2-0.4=0.9919 t_0.4-0.6=0.8398 t_0.6-0.8=0.7019 t_0.8-1.0=0.6376] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0265 + Deleted old checkpoint: checkpoint_epoch_0262 +[MEM @ epoch 265 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 266/499 (53% done) --- + [Epoch 266][10/40] loss=0.882946 avg=0.852889 VRAM=38.9GiB | 53.2% done | ETA(epoch): 690s + [Epoch 266][20/40] loss=0.876937 avg=0.842458 VRAM=38.8GiB | 53.3% done | ETA(epoch): 460s + [Epoch 266][30/40] loss=0.875005 avg=0.850988 VRAM=38.9GiB | 53.3% done | ETA(epoch): 230s + [Epoch 266][40/40] loss=0.901051 avg=0.847309 VRAM=38.8GiB | 53.4% done | ETA(epoch): 0s + Train loss: 0.847309 (920.7s) ETA: 3896min + Val loss: 0.867164 [t_0.0-0.2=1.0475 t_0.2-0.4=1.0092 t_0.4-0.6=0.8327 t_0.6-0.8=0.7231 t_0.8-1.0=0.6458] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0266 + Deleted old checkpoint: checkpoint_epoch_0263 +[MEM @ epoch 266 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 267/499 (53% done) --- + [Epoch 267][10/40] loss=0.803004 avg=0.866547 VRAM=38.9GiB | 53.4% done | ETA(epoch): 690s + [Epoch 267][20/40] loss=0.940381 avg=0.850469 VRAM=38.8GiB | 53.5% done | ETA(epoch): 460s + [Epoch 267][30/40] loss=0.816241 avg=0.845438 VRAM=38.9GiB | 53.5% done | ETA(epoch): 230s + [Epoch 267][40/40] loss=0.950698 avg=0.844911 VRAM=38.8GiB | 53.6% done | ETA(epoch): 0s + Train loss: 0.844911 (920.7s) ETA: 3879min + Val loss: 0.847740 [t_0.0-0.2=1.0852 t_0.2-0.4=0.9893 t_0.4-0.6=0.8111 t_0.6-0.8=0.7303 t_0.8-1.0=0.6212] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0267 + Deleted old checkpoint: checkpoint_epoch_0264 +[MEM @ epoch 267 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 268/499 (54% done) --- + [Epoch 268][10/40] loss=0.929049 avg=0.856136 VRAM=38.9GiB | 53.6% done | ETA(epoch): 691s + [Epoch 268][20/40] loss=0.824621 avg=0.842331 VRAM=38.8GiB | 53.7% done | ETA(epoch): 461s + [Epoch 268][30/40] loss=0.921832 avg=0.852209 VRAM=38.9GiB | 53.8% done | ETA(epoch): 230s + [Epoch 268][40/40] loss=0.750255 avg=0.845099 VRAM=38.8GiB | 53.8% done | ETA(epoch): 0s + Train loss: 0.845099 (920.8s) ETA: 3862min + Val loss: 0.870654 [t_0.0-0.2=1.0754 t_0.2-0.4=0.9659 t_0.4-0.6=0.8280 t_0.6-0.8=0.7102 t_0.8-1.0=0.6503] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0268 + Deleted old checkpoint: checkpoint_epoch_0265 +[MEM @ epoch 268 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 269/499 (54% done) --- + [Epoch 269][10/40] loss=0.745486 avg=0.850431 VRAM=38.9GiB | 53.8% done | ETA(epoch): 691s + [Epoch 269][20/40] loss=0.845316 avg=0.849237 VRAM=38.8GiB | 53.9% done | ETA(epoch): 460s + [Epoch 269][30/40] loss=0.769064 avg=0.853557 VRAM=38.9GiB | 53.9% done | ETA(epoch): 230s + [Epoch 269][40/40] loss=0.836922 avg=0.850357 VRAM=38.8GiB | 54.0% done | ETA(epoch): 0s + Train loss: 0.850357 (921.3s) ETA: 3845min + Val loss: 0.870745 [t_0.0-0.2=1.0731 t_0.2-0.4=0.9993 t_0.4-0.6=0.8654 t_0.6-0.8=0.7031 t_0.8-1.0=0.6446] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0269 + Deleted old checkpoint: checkpoint_epoch_0266 +[MEM @ epoch 269 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 270/499 (54% done) --- + [Epoch 270][10/40] loss=0.891375 avg=0.820665 VRAM=38.9GiB | 54.0% done | ETA(epoch): 691s + [Epoch 270][20/40] loss=0.826802 avg=0.828647 VRAM=38.8GiB | 54.1% done | ETA(epoch): 460s + [Epoch 270][30/40] loss=0.806021 avg=0.838207 VRAM=38.9GiB | 54.1% done | ETA(epoch): 230s + [Epoch 270][40/40] loss=0.793039 avg=0.842965 VRAM=38.8GiB | 54.2% done | ETA(epoch): 0s + Train loss: 0.842965 (921.0s) ETA: 3828min + Val loss: 0.893814 [t_0.0-0.2=1.0632 t_0.2-0.4=1.0043 t_0.4-0.6=0.8311 t_0.6-0.8=0.7236 t_0.8-1.0=0.6563] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0270 + Deleted old checkpoint: checkpoint_epoch_0267 +[MEM @ epoch 270 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 271/499 (54% done) --- + [Epoch 271][10/40] loss=0.848648 avg=0.860071 VRAM=38.9GiB | 54.2% done | ETA(epoch): 691s + [Epoch 271][20/40] loss=0.928485 avg=0.862389 VRAM=38.8GiB | 54.3% done | ETA(epoch): 461s + [Epoch 271][30/40] loss=0.937934 avg=0.865283 VRAM=38.9GiB | 54.4% done | ETA(epoch): 230s + [Epoch 271][40/40] loss=0.846655 avg=0.863022 VRAM=38.8GiB | 54.4% done | ETA(epoch): 0s + Train loss: 0.863022 (921.3s) ETA: 3811min + Val loss: 0.871289 [t_0.0-0.2=1.0752 t_0.2-0.4=0.9816 t_0.4-0.6=0.8168 t_0.6-0.8=0.7408 t_0.8-1.0=0.6476] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0271 + Deleted old checkpoint: checkpoint_epoch_0268 +[MEM @ epoch 271 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 272/499 (54% done) --- + [Epoch 272][10/40] loss=0.806129 avg=0.850892 VRAM=38.9GiB | 54.4% done | ETA(epoch): 691s + [Epoch 272][20/40] loss=0.907758 avg=0.851023 VRAM=38.8GiB | 54.5% done | ETA(epoch): 460s + [Epoch 272][30/40] loss=0.773645 avg=0.852419 VRAM=38.9GiB | 54.5% done | ETA(epoch): 230s + [Epoch 272][40/40] loss=0.848361 avg=0.848868 VRAM=38.8GiB | 54.6% done | ETA(epoch): 0s + Train loss: 0.848868 (920.9s) ETA: 3794min + Val loss: 0.851516 [t_0.0-0.2=1.0648 t_0.2-0.4=0.9569 t_0.4-0.6=0.8377 t_0.6-0.8=0.7293 t_0.8-1.0=0.6201] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0272 + Deleted old checkpoint: checkpoint_epoch_0269 +[MEM @ epoch 272 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 273/499 (55% done) --- + [Epoch 273][10/40] loss=0.989885 avg=0.853584 VRAM=38.9GiB | 54.6% done | ETA(epoch): 691s + [Epoch 273][20/40] loss=0.857673 avg=0.852077 VRAM=38.8GiB | 54.7% done | ETA(epoch): 461s + [Epoch 273][30/40] loss=0.934165 avg=0.861156 VRAM=38.9GiB | 54.8% done | ETA(epoch): 230s + [Epoch 273][40/40] loss=0.811234 avg=0.857616 VRAM=38.8GiB | 54.8% done | ETA(epoch): 0s + Train loss: 0.857616 (921.4s) ETA: 3778min + Val loss: 0.887173 [t_0.0-0.2=1.0788 t_0.2-0.4=0.9957 t_0.4-0.6=0.8392 t_0.6-0.8=0.7134 t_0.8-1.0=0.6437] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0273 + Deleted old checkpoint: checkpoint_epoch_0270 +[MEM @ epoch 273 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 274/499 (55% done) --- + [Epoch 274][10/40] loss=0.755666 avg=0.854000 VRAM=38.9GiB | 54.9% done | ETA(epoch): 691s + [Epoch 274][20/40] loss=0.771140 avg=0.830657 VRAM=38.8GiB | 54.9% done | ETA(epoch): 460s + [Epoch 274][30/40] loss=0.924737 avg=0.837848 VRAM=38.9GiB | 54.9% done | ETA(epoch): 230s + [Epoch 274][40/40] loss=0.789998 avg=0.833950 VRAM=38.8GiB | 55.0% done | ETA(epoch): 0s + Train loss: 0.833950 (920.9s) ETA: 3761min + Val loss: 0.860475 [t_0.0-0.2=1.0583 t_0.2-0.4=0.9818 t_0.4-0.6=0.8510 t_0.6-0.8=0.7052 t_0.8-1.0=0.6297] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0274 + Deleted old checkpoint: checkpoint_epoch_0271 +[MEM @ epoch 274 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 275/499 (55% done) --- + [Epoch 275][10/40] loss=0.872675 avg=0.851480 VRAM=38.9GiB | 55.0% done | ETA(epoch): 691s + [Epoch 275][20/40] loss=0.933203 avg=0.848196 VRAM=38.8GiB | 55.1% done | ETA(epoch): 461s + [Epoch 275][30/40] loss=0.948233 avg=0.861364 VRAM=38.9GiB | 55.1% done | ETA(epoch): 230s + [Epoch 275][40/40] loss=0.796216 avg=0.848361 VRAM=38.8GiB | 55.2% done | ETA(epoch): 0s + Train loss: 0.848361 (920.8s) ETA: 3744min + Val loss: 0.837239 [t_0.0-0.2=1.0907 t_0.2-0.4=0.9902 t_0.4-0.6=0.8415 t_0.6-0.8=0.6703 t_0.8-1.0=0.6305] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0275 + Deleted old checkpoint: checkpoint_epoch_0272 +[MEM @ epoch 275 end] RAM: 19.0/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 276/499 (55% done) --- + [Epoch 276][10/40] loss=0.802031 avg=0.844300 VRAM=38.9GiB | 55.2% done | ETA(epoch): 690s + [Epoch 276][20/40] loss=0.820949 avg=0.847910 VRAM=38.8GiB | 55.3% done | ETA(epoch): 460s + [Epoch 276][30/40] loss=1.008999 avg=0.859987 VRAM=38.9GiB | 55.4% done | ETA(epoch): 230s + [Epoch 276][40/40] loss=0.797619 avg=0.860482 VRAM=38.8GiB | 55.4% done | ETA(epoch): 0s + Train loss: 0.860482 (921.1s) ETA: 3727min + Val loss: 0.856369 [t_0.0-0.2=1.0723 t_0.2-0.4=1.0169 t_0.4-0.6=0.8497 t_0.6-0.8=0.7075 t_0.8-1.0=0.6368] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0276 + Deleted old checkpoint: checkpoint_epoch_0273 +[MEM @ epoch 276 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 277/499 (55% done) --- + [Epoch 277][10/40] loss=0.953713 avg=0.851975 VRAM=38.9GiB | 55.5% done | ETA(epoch): 691s + [Epoch 277][20/40] loss=0.959442 avg=0.861447 VRAM=38.8GiB | 55.5% done | ETA(epoch): 460s + [Epoch 277][30/40] loss=0.891267 avg=0.859139 VRAM=38.9GiB | 55.5% done | ETA(epoch): 230s + [Epoch 277][40/40] loss=0.796673 avg=0.855842 VRAM=38.8GiB | 55.6% done | ETA(epoch): 0s + Train loss: 0.855842 (920.7s) ETA: 3710min + Val loss: 0.849305 [t_0.0-0.2=1.0558 t_0.2-0.4=1.0025 t_0.4-0.6=0.8732 t_0.6-0.8=0.7179 t_0.8-1.0=0.6344] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0277 + Deleted old checkpoint: checkpoint_epoch_0274 +[MEM @ epoch 277 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 278/499 (56% done) --- + [Epoch 278][10/40] loss=0.844313 avg=0.841951 VRAM=38.9GiB | 55.6% done | ETA(epoch): 691s + [Epoch 278][20/40] loss=0.769220 avg=0.838531 VRAM=38.8GiB | 55.7% done | ETA(epoch): 460s + [Epoch 278][30/40] loss=0.817710 avg=0.846289 VRAM=38.9GiB | 55.8% done | ETA(epoch): 230s + [Epoch 278][40/40] loss=0.825791 avg=0.852975 VRAM=38.8GiB | 55.8% done | ETA(epoch): 0s + Train loss: 0.852975 (920.6s) ETA: 3693min + Val loss: 0.829909 [t_0.0-0.2=1.0821 t_0.2-0.4=0.9942 t_0.4-0.6=0.8507 t_0.6-0.8=0.7046 t_0.8-1.0=0.6428] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0278 + Deleted old checkpoint: checkpoint_epoch_0275 +[MEM @ epoch 278 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 279/499 (56% done) --- + [Epoch 279][10/40] loss=0.769835 avg=0.841799 VRAM=38.9GiB | 55.9% done | ETA(epoch): 691s + [Epoch 279][20/40] loss=0.841511 avg=0.851774 VRAM=38.8GiB | 55.9% done | ETA(epoch): 461s + [Epoch 279][30/40] loss=0.900819 avg=0.853015 VRAM=38.9GiB | 56.0% done | ETA(epoch): 230s + [Epoch 279][40/40] loss=0.811643 avg=0.853382 VRAM=38.8GiB | 56.0% done | ETA(epoch): 0s + Train loss: 0.853382 (921.2s) ETA: 3676min + Val loss: 0.856133 [t_0.0-0.2=1.0678 t_0.2-0.4=0.9632 t_0.4-0.6=0.8533 t_0.6-0.8=0.7188 t_0.8-1.0=0.6343] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0279 + Deleted old checkpoint: checkpoint_epoch_0276 +[MEM @ epoch 279 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 280/499 (56% done) --- + [Epoch 280][10/40] loss=0.836590 avg=0.840551 VRAM=38.9GiB | 56.0% done | ETA(epoch): 690s + [Epoch 280][20/40] loss=0.872907 avg=0.844495 VRAM=38.8GiB | 56.1% done | ETA(epoch): 460s + [Epoch 280][30/40] loss=0.911418 avg=0.843138 VRAM=38.9GiB | 56.1% done | ETA(epoch): 230s + [Epoch 280][40/40] loss=0.933127 avg=0.853309 VRAM=38.8GiB | 56.2% done | ETA(epoch): 0s + Train loss: 0.853309 (920.8s) ETA: 3659min + Val loss: 0.874129 [t_0.0-0.2=1.0727 t_0.2-0.4=0.9907 t_0.4-0.6=0.7968 t_0.6-0.8=0.7298 t_0.8-1.0=0.6418] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0280 + Deleted old checkpoint: checkpoint_epoch_0277 +[MEM @ epoch 280 end] RAM: 18.9/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 281/499 (56% done) --- + [Epoch 281][10/40] loss=0.882947 avg=0.887942 VRAM=38.9GiB | 56.2% done | ETA(epoch): 690s + [Epoch 281][20/40] loss=0.870086 avg=0.876866 VRAM=38.8GiB | 56.3% done | ETA(epoch): 460s + [Epoch 281][30/40] loss=0.900852 avg=0.872929 VRAM=38.9GiB | 56.4% done | ETA(epoch): 230s + [Epoch 281][40/40] loss=0.847308 avg=0.868365 VRAM=38.8GiB | 56.4% done | ETA(epoch): 0s + Train loss: 0.868365 (921.0s) ETA: 3642min + Val loss: 0.852378 [t_0.0-0.2=1.0788 t_0.2-0.4=1.0048 t_0.4-0.6=0.8268 t_0.6-0.8=0.7026 t_0.8-1.0=0.6545] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0281 + Deleted old checkpoint: checkpoint_epoch_0278 +[MEM @ epoch 281 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 282/499 (56% done) --- + [Epoch 282][10/40] loss=0.816177 avg=0.875634 VRAM=38.9GiB | 56.5% done | ETA(epoch): 691s + [Epoch 282][20/40] loss=0.767217 avg=0.866097 VRAM=38.8GiB | 56.5% done | ETA(epoch): 461s + [Epoch 282][30/40] loss=0.806098 avg=0.871834 VRAM=38.9GiB | 56.5% done | ETA(epoch): 230s + [Epoch 282][40/40] loss=0.856434 avg=0.865594 VRAM=38.8GiB | 56.6% done | ETA(epoch): 0s + Train loss: 0.865594 (921.2s) ETA: 3625min + Val loss: 0.836162 [t_0.0-0.2=1.0827 t_0.2-0.4=1.0101 t_0.4-0.6=0.8503 t_0.6-0.8=0.6914 t_0.8-1.0=0.6226] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0282 + Deleted old checkpoint: checkpoint_epoch_0279 +[MEM @ epoch 282 end] RAM: 19.0/188.4 GiB (10.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 283/499 (57% done) --- + [Epoch 283][10/40] loss=0.851666 avg=0.873400 VRAM=38.9GiB | 56.6% done | ETA(epoch): 690s + [Epoch 283][20/40] loss=0.931510 avg=0.872787 VRAM=38.8GiB | 56.7% done | ETA(epoch): 460s + [Epoch 283][30/40] loss=0.932495 avg=0.870318 VRAM=38.9GiB | 56.8% done | ETA(epoch): 230s + [Epoch 283][40/40] loss=0.819316 avg=0.869982 VRAM=38.8GiB | 56.8% done | ETA(epoch): 0s + Train loss: 0.869982 (920.8s) ETA: 3608min + Val loss: 0.839834 [t_0.0-0.2=1.0795 t_0.2-0.4=0.9656 t_0.4-0.6=0.8627 t_0.6-0.8=0.7003 t_0.8-1.0=0.6220] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0283 + Deleted old checkpoint: checkpoint_epoch_0280 +[MEM @ epoch 283 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 284/499 (57% done) --- + [Epoch 284][10/40] loss=0.813042 avg=0.867064 VRAM=38.9GiB | 56.9% done | ETA(epoch): 690s + [Epoch 284][20/40] loss=0.871189 avg=0.865339 VRAM=38.8GiB | 56.9% done | ETA(epoch): 460s + [Epoch 284][30/40] loss=0.846747 avg=0.851597 VRAM=38.9GiB | 57.0% done | ETA(epoch): 230s + [Epoch 284][40/40] loss=0.907829 avg=0.851713 VRAM=38.8GiB | 57.0% done | ETA(epoch): 0s + Train loss: 0.851713 (920.5s) ETA: 3591min + Val loss: 0.884990 [t_0.0-0.2=1.0824 t_0.2-0.4=0.9994 t_0.4-0.6=0.8455 t_0.6-0.8=0.6955 t_0.8-1.0=0.6362] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0284 + Deleted old checkpoint: checkpoint_epoch_0281 +[MEM @ epoch 284 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 285/499 (57% done) --- + [Epoch 285][10/40] loss=0.849345 avg=0.883769 VRAM=38.9GiB | 57.0% done | ETA(epoch): 690s + [Epoch 285][20/40] loss=0.824511 avg=0.860856 VRAM=38.8GiB | 57.1% done | ETA(epoch): 460s + [Epoch 285][30/40] loss=0.818177 avg=0.863361 VRAM=38.9GiB | 57.1% done | ETA(epoch): 230s + [Epoch 285][40/40] loss=0.975794 avg=0.870254 VRAM=38.8GiB | 57.2% done | ETA(epoch): 0s + Train loss: 0.870254 (920.7s) ETA: 3574min + Val loss: 0.852461 [t_0.0-0.2=1.0835 t_0.2-0.4=1.0122 t_0.4-0.6=0.8293 t_0.6-0.8=0.7008 t_0.8-1.0=0.6309] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0285 + Deleted old checkpoint: checkpoint_epoch_0282 +[MEM @ epoch 285 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 286/499 (57% done) --- + [Epoch 286][10/40] loss=0.803103 avg=0.802501 VRAM=38.9GiB | 57.2% done | ETA(epoch): 690s + [Epoch 286][20/40] loss=0.907817 avg=0.830592 VRAM=38.8GiB | 57.3% done | ETA(epoch): 460s + [Epoch 286][30/40] loss=0.767943 avg=0.829502 VRAM=38.9GiB | 57.4% done | ETA(epoch): 230s + [Epoch 286][40/40] loss=0.909910 avg=0.841161 VRAM=38.8GiB | 57.4% done | ETA(epoch): 0s + Train loss: 0.841161 (920.7s) ETA: 3558min + Val loss: 0.826641 [t_0.0-0.2=1.0591 t_0.2-0.4=1.0067 t_0.4-0.6=0.8416 t_0.6-0.8=0.7129 t_0.8-1.0=0.6549] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0286 + Deleted old checkpoint: checkpoint_epoch_0283 +[MEM @ epoch 286 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 287/499 (57% done) --- + [Epoch 287][10/40] loss=0.847727 avg=0.870649 VRAM=38.9GiB | 57.5% done | ETA(epoch): 691s + [Epoch 287][20/40] loss=0.748152 avg=0.851663 VRAM=38.8GiB | 57.5% done | ETA(epoch): 461s + [Epoch 287][30/40] loss=0.826128 avg=0.848725 VRAM=38.9GiB | 57.6% done | ETA(epoch): 230s + [Epoch 287][40/40] loss=0.838483 avg=0.842589 VRAM=38.8GiB | 57.6% done | ETA(epoch): 0s + Train loss: 0.842589 (920.7s) ETA: 3541min + Val loss: 0.845944 [t_0.0-0.2=1.0731 t_0.2-0.4=1.0127 t_0.4-0.6=0.8683 t_0.6-0.8=0.6975 t_0.8-1.0=0.6311] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0287 + Deleted old checkpoint: checkpoint_epoch_0284 +[MEM @ epoch 287 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 288/499 (58% done) --- + [Epoch 288][10/40] loss=0.844611 avg=0.845302 VRAM=38.9GiB | 57.6% done | ETA(epoch): 690s + [Epoch 288][20/40] loss=0.865199 avg=0.839814 VRAM=38.8GiB | 57.7% done | ETA(epoch): 460s + [Epoch 288][30/40] loss=0.908830 avg=0.844253 VRAM=38.9GiB | 57.8% done | ETA(epoch): 230s + [Epoch 288][40/40] loss=0.800585 avg=0.843892 VRAM=38.8GiB | 57.8% done | ETA(epoch): 0s + Train loss: 0.843892 (920.5s) ETA: 3524min + Val loss: 0.892038 [t_0.0-0.2=1.0746 t_0.2-0.4=0.9730 t_0.4-0.6=0.8514 t_0.6-0.8=0.7264 t_0.8-1.0=0.6329] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0288 + Deleted old checkpoint: checkpoint_epoch_0285 +[MEM @ epoch 288 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 289/499 (58% done) --- + [Epoch 289][10/40] loss=0.845247 avg=0.884770 VRAM=38.9GiB | 57.9% done | ETA(epoch): 690s + [Epoch 289][20/40] loss=0.846521 avg=0.862621 VRAM=38.8GiB | 57.9% done | ETA(epoch): 460s + [Epoch 289][30/40] loss=0.968126 avg=0.865162 VRAM=38.9GiB | 58.0% done | ETA(epoch): 230s + [Epoch 289][40/40] loss=0.784278 avg=0.858539 VRAM=38.8GiB | 58.0% done | ETA(epoch): 0s + Train loss: 0.858539 (920.6s) ETA: 3507min + Val loss: 0.822258 [t_0.0-0.2=1.0763 t_0.2-0.4=0.9624 t_0.4-0.6=0.8855 t_0.6-0.8=0.6982 t_0.8-1.0=0.6322] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0289 + Deleted old checkpoint: checkpoint_epoch_0286 +[MEM @ epoch 289 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 290/499 (58% done) --- + [Epoch 290][10/40] loss=0.836380 avg=0.830036 VRAM=38.9GiB | 58.1% done | ETA(epoch): 691s + [Epoch 290][20/40] loss=0.810249 avg=0.845819 VRAM=38.8GiB | 58.1% done | ETA(epoch): 460s + [Epoch 290][30/40] loss=0.929849 avg=0.857344 VRAM=38.9GiB | 58.1% done | ETA(epoch): 230s + [Epoch 290][40/40] loss=0.824771 avg=0.865020 VRAM=38.8GiB | 58.2% done | ETA(epoch): 0s + Train loss: 0.865020 (920.5s) ETA: 3490min + Val loss: 0.853973 [t_0.0-0.2=1.0675 t_0.2-0.4=0.9862 t_0.4-0.6=0.8730 t_0.6-0.8=0.7086 t_0.8-1.0=0.6265] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0290 + Deleted old checkpoint: checkpoint_epoch_0287 +[MEM @ epoch 290 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 291/499 (58% done) --- + [Epoch 291][10/40] loss=0.944900 avg=0.875185 VRAM=38.9GiB | 58.2% done | ETA(epoch): 691s + [Epoch 291][20/40] loss=0.815760 avg=0.856319 VRAM=38.8GiB | 58.3% done | ETA(epoch): 461s + [Epoch 291][30/40] loss=0.853277 avg=0.856644 VRAM=38.9GiB | 58.4% done | ETA(epoch): 230s + [Epoch 291][40/40] loss=0.857290 avg=0.859974 VRAM=38.8GiB | 58.4% done | ETA(epoch): 0s + Train loss: 0.859974 (921.4s) ETA: 3473min + Val loss: 0.835639 [t_0.0-0.2=1.0749 t_0.2-0.4=1.0002 t_0.4-0.6=0.8145 t_0.6-0.8=0.7025 t_0.8-1.0=0.6405] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0291 + Deleted old checkpoint: checkpoint_epoch_0288 +[MEM @ epoch 291 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 292/499 (58% done) --- + [Epoch 292][10/40] loss=0.770648 avg=0.829706 VRAM=38.9GiB | 58.5% done | ETA(epoch): 690s + [Epoch 292][20/40] loss=0.852325 avg=0.834461 VRAM=38.8GiB | 58.5% done | ETA(epoch): 460s + [Epoch 292][30/40] loss=0.816216 avg=0.845188 VRAM=38.9GiB | 58.6% done | ETA(epoch): 230s + [Epoch 292][40/40] loss=0.887810 avg=0.851154 VRAM=38.8GiB | 58.6% done | ETA(epoch): 0s + Train loss: 0.851154 (920.8s) ETA: 3456min + Val loss: 0.835347 [t_0.0-0.2=1.0662 t_0.2-0.4=1.0121 t_0.4-0.6=0.8448 t_0.6-0.8=0.7181 t_0.8-1.0=0.6339] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0292 + Deleted old checkpoint: checkpoint_epoch_0289 +[MEM @ epoch 292 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 293/499 (59% done) --- + [Epoch 293][10/40] loss=0.894422 avg=0.883049 VRAM=38.9GiB | 58.7% done | ETA(epoch): 690s + [Epoch 293][20/40] loss=0.847464 avg=0.875448 VRAM=38.8GiB | 58.7% done | ETA(epoch): 460s + [Epoch 293][30/40] loss=0.901913 avg=0.860108 VRAM=38.9GiB | 58.8% done | ETA(epoch): 230s + [Epoch 293][40/40] loss=0.900573 avg=0.856629 VRAM=38.8GiB | 58.8% done | ETA(epoch): 0s + Train loss: 0.856629 (920.6s) ETA: 3439min + Val loss: 0.853394 [t_0.0-0.2=1.0679 t_0.2-0.4=0.9833 t_0.4-0.6=0.8732 t_0.6-0.8=0.7011 t_0.8-1.0=0.6314] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0293 + Deleted old checkpoint: checkpoint_epoch_0290 +[MEM @ epoch 293 end] RAM: 18.8/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 294/499 (59% done) --- + [Epoch 294][10/40] loss=0.786805 avg=0.861020 VRAM=38.9GiB | 58.9% done | ETA(epoch): 690s + [Epoch 294][20/40] loss=0.905941 avg=0.869969 VRAM=38.8GiB | 58.9% done | ETA(epoch): 460s + [Epoch 294][30/40] loss=0.909057 avg=0.861453 VRAM=38.9GiB | 59.0% done | ETA(epoch): 230s + [Epoch 294][40/40] loss=0.937140 avg=0.861683 VRAM=38.8GiB | 59.0% done | ETA(epoch): 0s + Train loss: 0.861683 (920.3s) ETA: 3422min + Val loss: 0.813783 [t_0.0-0.2=1.0856 t_0.2-0.4=0.9839 t_0.4-0.6=0.8556 t_0.6-0.8=0.7118 t_0.8-1.0=0.6313] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0294 + Deleted old checkpoint: checkpoint_epoch_0291 +[MEM @ epoch 294 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 295/499 (59% done) --- + [Epoch 295][10/40] loss=0.848801 avg=0.846162 VRAM=38.9GiB | 59.1% done | ETA(epoch): 690s + [Epoch 295][20/40] loss=0.744760 avg=0.845516 VRAM=38.8GiB | 59.1% done | ETA(epoch): 460s + [Epoch 295][30/40] loss=0.812991 avg=0.840873 VRAM=38.9GiB | 59.2% done | ETA(epoch): 230s + [Epoch 295][40/40] loss=0.844728 avg=0.847596 VRAM=38.8GiB | 59.2% done | ETA(epoch): 0s + Train loss: 0.847596 (920.7s) ETA: 3406min + Val loss: 0.844520 [t_0.0-0.2=1.0641 t_0.2-0.4=0.9767 t_0.4-0.6=0.8557 t_0.6-0.8=0.7090 t_0.8-1.0=0.6271] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0295 + Deleted old checkpoint: checkpoint_epoch_0292 +[MEM @ epoch 295 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 296/499 (59% done) --- + [Epoch 296][10/40] loss=0.892428 avg=0.891187 VRAM=38.9GiB | 59.2% done | ETA(epoch): 690s + [Epoch 296][20/40] loss=0.827378 avg=0.890426 VRAM=38.8GiB | 59.3% done | ETA(epoch): 460s + [Epoch 296][30/40] loss=0.867073 avg=0.866631 VRAM=38.9GiB | 59.4% done | ETA(epoch): 230s + [Epoch 296][40/40] loss=0.872081 avg=0.865187 VRAM=38.8GiB | 59.4% done | ETA(epoch): 0s + Train loss: 0.865187 (920.8s) ETA: 3389min + Val loss: 0.836447 [t_0.0-0.2=1.0823 t_0.2-0.4=1.0102 t_0.4-0.6=0.8264 t_0.6-0.8=0.7107 t_0.8-1.0=0.6111] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0296 + Deleted old checkpoint: checkpoint_epoch_0293 +[MEM @ epoch 296 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 297/499 (59% done) --- + [Epoch 297][10/40] loss=0.896151 avg=0.847661 VRAM=38.9GiB | 59.5% done | ETA(epoch): 690s + [Epoch 297][20/40] loss=0.937860 avg=0.839238 VRAM=38.8GiB | 59.5% done | ETA(epoch): 460s + [Epoch 297][30/40] loss=0.908101 avg=0.841512 VRAM=38.9GiB | 59.6% done | ETA(epoch): 230s + [Epoch 297][40/40] loss=0.916556 avg=0.846763 VRAM=38.8GiB | 59.6% done | ETA(epoch): 0s + Train loss: 0.846763 (920.4s) ETA: 3372min + Val loss: 0.860448 [t_0.0-0.2=1.0656 t_0.2-0.4=1.0098 t_0.4-0.6=0.8696 t_0.6-0.8=0.6919 t_0.8-1.0=0.6416] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0297 + Deleted old checkpoint: checkpoint_epoch_0294 +[MEM @ epoch 297 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 298/499 (60% done) --- + [Epoch 298][10/40] loss=0.904500 avg=0.860765 VRAM=38.9GiB | 59.7% done | ETA(epoch): 690s + [Epoch 298][20/40] loss=0.895341 avg=0.859872 VRAM=38.8GiB | 59.7% done | ETA(epoch): 460s + [Epoch 298][30/40] loss=0.741991 avg=0.859406 VRAM=38.9GiB | 59.8% done | ETA(epoch): 230s + [Epoch 298][40/40] loss=0.936153 avg=0.860717 VRAM=38.8GiB | 59.8% done | ETA(epoch): 0s + Train loss: 0.860717 (921.0s) ETA: 3355min + Val loss: 0.830329 [t_0.0-0.2=1.0825 t_0.2-0.4=0.9849 t_0.4-0.6=0.8460 t_0.6-0.8=0.7025 t_0.8-1.0=0.6286] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0298 + Deleted old checkpoint: checkpoint_epoch_0295 +[MEM @ epoch 298 end] RAM: 18.9/188.4 GiB (10.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 299/499 (60% done) --- + [Epoch 299][10/40] loss=0.936270 avg=0.855029 VRAM=38.9GiB | 59.9% done | ETA(epoch): 690s + [Epoch 299][20/40] loss=0.877774 avg=0.859562 VRAM=38.8GiB | 59.9% done | ETA(epoch): 460s + [Epoch 299][30/40] loss=0.826745 avg=0.853584 VRAM=38.9GiB | 60.0% done | ETA(epoch): 230s + [Epoch 299][40/40] loss=0.856579 avg=0.851577 VRAM=38.8GiB | 60.0% done | ETA(epoch): 0s + [MilestoneVis] train_0 step 11999 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 11999 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 11999 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 11999 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 11999 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 11999 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 11999 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 11999 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 11999 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 11999 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + Train loss: 0.851577 (1673.8s) ETA: 3347min + Val loss: 0.826701 [t_0.0-0.2=1.0726 t_0.2-0.4=1.0395 t_0.4-0.6=0.8284 t_0.6-0.8=0.7187 t_0.8-1.0=0.6286] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0299 + Deleted old checkpoint: checkpoint_epoch_0296 +[MEM @ epoch 299 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 300/499 (60% done) --- + [Epoch 300][10/40] loss=0.769285 avg=0.877111 VRAM=38.9GiB | 60.1% done | ETA(epoch): 690s + [Epoch 300][20/40] loss=0.804296 avg=0.867284 VRAM=38.8GiB | 60.1% done | ETA(epoch): 460s + [Epoch 300][30/40] loss=0.872723 avg=0.862259 VRAM=38.9GiB | 60.2% done | ETA(epoch): 230s + [Epoch 300][40/40] loss=0.953479 avg=0.859109 VRAM=38.8GiB | 60.2% done | ETA(epoch): 0s + Train loss: 0.859109 (920.7s) ETA: 3330min + Val loss: 0.863237 [t_0.0-0.2=1.0779 t_0.2-0.4=0.9954 t_0.4-0.6=0.8111 t_0.6-0.8=0.7286 t_0.8-1.0=0.6324] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0300 + Deleted old checkpoint: checkpoint_epoch_0297 +[MEM @ epoch 300 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 301/499 (60% done) --- + [Epoch 301][10/40] loss=0.866202 avg=0.835786 VRAM=38.9GiB | 60.2% done | ETA(epoch): 690s + [Epoch 301][20/40] loss=0.840708 avg=0.833005 VRAM=38.8GiB | 60.3% done | ETA(epoch): 460s + [Epoch 301][30/40] loss=0.876005 avg=0.839418 VRAM=38.9GiB | 60.4% done | ETA(epoch): 230s + [Epoch 301][40/40] loss=0.805290 avg=0.845764 VRAM=38.8GiB | 60.4% done | ETA(epoch): 0s + Train loss: 0.845764 (920.4s) ETA: 3313min + Val loss: 0.871348 [t_0.0-0.2=1.0679 t_0.2-0.4=0.9679 t_0.4-0.6=0.8461 t_0.6-0.8=0.6883 t_0.8-1.0=0.6317] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0301 + Deleted old checkpoint: checkpoint_epoch_0298 +[MEM @ epoch 301 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 302/499 (60% done) --- + [Epoch 302][10/40] loss=0.860797 avg=0.827331 VRAM=38.9GiB | 60.5% done | ETA(epoch): 690s + [Epoch 302][20/40] loss=0.811240 avg=0.833226 VRAM=38.8GiB | 60.5% done | ETA(epoch): 460s + [Epoch 302][30/40] loss=0.890717 avg=0.839758 VRAM=38.9GiB | 60.6% done | ETA(epoch): 230s + [Epoch 302][40/40] loss=0.844632 avg=0.842611 VRAM=38.8GiB | 60.6% done | ETA(epoch): 0s + Train loss: 0.842611 (920.6s) ETA: 3296min + Val loss: 0.840269 [t_0.0-0.2=1.0770 t_0.2-0.4=0.9939 t_0.4-0.6=0.8448 t_0.6-0.8=0.7011 t_0.8-1.0=0.6302] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0302 + Deleted old checkpoint: checkpoint_epoch_0299 +[MEM @ epoch 302 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 303/499 (61% done) --- + [Epoch 303][10/40] loss=0.819647 avg=0.835862 VRAM=38.9GiB | 60.7% done | ETA(epoch): 690s + [Epoch 303][20/40] loss=0.889681 avg=0.851150 VRAM=38.8GiB | 60.7% done | ETA(epoch): 460s + [Epoch 303][30/40] loss=0.783820 avg=0.857548 VRAM=38.9GiB | 60.8% done | ETA(epoch): 230s + [Epoch 303][40/40] loss=0.907633 avg=0.857198 VRAM=38.8GiB | 60.8% done | ETA(epoch): 0s + Train loss: 0.857198 (920.9s) ETA: 3279min + Val loss: 0.884671 [t_0.0-0.2=1.0737 t_0.2-0.4=0.9863 t_0.4-0.6=0.8727 t_0.6-0.8=0.6973 t_0.8-1.0=0.6445] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0303 + Deleted old checkpoint: checkpoint_epoch_0300 +[MEM @ epoch 303 end] RAM: 19.4/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 304/499 (61% done) --- + [Epoch 304][10/40] loss=0.815156 avg=0.845621 VRAM=38.9GiB | 60.9% done | ETA(epoch): 691s + [Epoch 304][20/40] loss=0.837439 avg=0.845568 VRAM=38.8GiB | 60.9% done | ETA(epoch): 460s + [Epoch 304][30/40] loss=0.844938 avg=0.846536 VRAM=38.9GiB | 61.0% done | ETA(epoch): 230s + [Epoch 304][40/40] loss=0.897790 avg=0.856936 VRAM=38.8GiB | 61.0% done | ETA(epoch): 0s + Train loss: 0.856936 (920.9s) ETA: 3262min + Val loss: 0.857887 [t_0.0-0.2=1.0912 t_0.2-0.4=0.9698 t_0.4-0.6=0.8796 t_0.6-0.8=0.7120 t_0.8-1.0=0.6160] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0304 + Deleted old checkpoint: checkpoint_epoch_0301 +[MEM @ epoch 304 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 305/499 (61% done) --- + [Epoch 305][10/40] loss=0.850125 avg=0.840801 VRAM=38.9GiB | 61.1% done | ETA(epoch): 690s + [Epoch 305][20/40] loss=0.854594 avg=0.842540 VRAM=38.8GiB | 61.1% done | ETA(epoch): 460s + [Epoch 305][30/40] loss=0.809267 avg=0.855312 VRAM=38.9GiB | 61.2% done | ETA(epoch): 230s + [Epoch 305][40/40] loss=0.845144 avg=0.850781 VRAM=38.8GiB | 61.2% done | ETA(epoch): 0s + Train loss: 0.850781 (920.6s) ETA: 3245min + Val loss: 0.883752 [t_0.0-0.2=1.0763 t_0.2-0.4=1.0166 t_0.4-0.6=0.8105 t_0.6-0.8=0.7257 t_0.8-1.0=0.6442] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0305 + Deleted old checkpoint: checkpoint_epoch_0302 +[MEM @ epoch 305 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 306/499 (61% done) --- + [Epoch 306][10/40] loss=0.979638 avg=0.853668 VRAM=38.9GiB | 61.3% done | ETA(epoch): 690s + [Epoch 306][20/40] loss=0.808847 avg=0.853857 VRAM=38.8GiB | 61.3% done | ETA(epoch): 460s + [Epoch 306][30/40] loss=0.938590 avg=0.854062 VRAM=38.9GiB | 61.4% done | ETA(epoch): 230s + [Epoch 306][40/40] loss=0.924214 avg=0.853008 VRAM=38.8GiB | 61.4% done | ETA(epoch): 0s + Train loss: 0.853008 (920.7s) ETA: 3228min + Val loss: 0.853897 [t_0.0-0.2=1.0672 t_0.2-0.4=1.0046 t_0.4-0.6=0.8364 t_0.6-0.8=0.7139 t_0.8-1.0=0.6389] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0306 + Deleted old checkpoint: checkpoint_epoch_0303 +[MEM @ epoch 306 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 307/499 (61% done) --- + [Epoch 307][10/40] loss=0.877975 avg=0.883332 VRAM=38.9GiB | 61.5% done | ETA(epoch): 690s + [Epoch 307][20/40] loss=0.929254 avg=0.874928 VRAM=38.8GiB | 61.5% done | ETA(epoch): 460s + [Epoch 307][30/40] loss=0.751914 avg=0.853021 VRAM=38.9GiB | 61.6% done | ETA(epoch): 230s + [Epoch 307][40/40] loss=0.880325 avg=0.849159 VRAM=38.8GiB | 61.6% done | ETA(epoch): 0s + Train loss: 0.849159 (920.0s) ETA: 3211min + Val loss: 0.851860 [t_0.0-0.2=1.0786 t_0.2-0.4=1.0014 t_0.4-0.6=0.8219 t_0.6-0.8=0.7233 t_0.8-1.0=0.6310] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0307 + Deleted old checkpoint: checkpoint_epoch_0304 +[MEM @ epoch 307 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 308/499 (62% done) --- + [Epoch 308][10/40] loss=0.905189 avg=0.845547 VRAM=38.9GiB | 61.7% done | ETA(epoch): 690s + [Epoch 308][20/40] loss=0.821187 avg=0.840382 VRAM=38.8GiB | 61.7% done | ETA(epoch): 460s + [Epoch 308][30/40] loss=0.859151 avg=0.843828 VRAM=38.9GiB | 61.8% done | ETA(epoch): 230s + [Epoch 308][40/40] loss=0.941155 avg=0.856637 VRAM=38.8GiB | 61.8% done | ETA(epoch): 0s + Train loss: 0.856637 (920.4s) ETA: 3194min + Val loss: 0.844515 [t_0.0-0.2=1.0676 t_0.2-0.4=1.0092 t_0.4-0.6=0.8480 t_0.6-0.8=0.7212 t_0.8-1.0=0.6398] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0308 + Deleted old checkpoint: checkpoint_epoch_0305 +[MEM @ epoch 308 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 309/499 (62% done) --- + [Epoch 309][10/40] loss=0.963074 avg=0.879339 VRAM=38.9GiB | 61.9% done | ETA(epoch): 690s + [Epoch 309][20/40] loss=0.827064 avg=0.864071 VRAM=38.8GiB | 61.9% done | ETA(epoch): 460s + [Epoch 309][30/40] loss=0.852371 avg=0.864308 VRAM=38.9GiB | 62.0% done | ETA(epoch): 230s + [Epoch 309][40/40] loss=0.823833 avg=0.862120 VRAM=38.8GiB | 62.0% done | ETA(epoch): 0s + Train loss: 0.862120 (920.1s) ETA: 3177min + Val loss: 0.849973 [t_0.0-0.2=1.0760 t_0.2-0.4=0.9869 t_0.4-0.6=0.8642 t_0.6-0.8=0.7111 t_0.8-1.0=0.6301] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0309 + Deleted old checkpoint: checkpoint_epoch_0306 +[MEM @ epoch 309 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 310/499 (62% done) --- + [Epoch 310][10/40] loss=0.785320 avg=0.853061 VRAM=38.9GiB | 62.1% done | ETA(epoch): 690s + [Epoch 310][20/40] loss=0.749638 avg=0.864249 VRAM=38.8GiB | 62.1% done | ETA(epoch): 460s + [Epoch 310][30/40] loss=0.960007 avg=0.859456 VRAM=38.9GiB | 62.2% done | ETA(epoch): 230s + [Epoch 310][40/40] loss=0.814081 avg=0.860181 VRAM=38.8GiB | 62.2% done | ETA(epoch): 0s + Train loss: 0.860181 (920.2s) ETA: 3160min + Val loss: 0.854262 [t_0.0-0.2=1.0682 t_0.2-0.4=1.0044 t_0.4-0.6=0.8366 t_0.6-0.8=0.6925 t_0.8-1.0=0.6425] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0310 + Deleted old checkpoint: checkpoint_epoch_0307 +[MEM @ epoch 310 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 311/499 (62% done) --- + [Epoch 311][10/40] loss=0.873614 avg=0.885593 VRAM=38.9GiB | 62.3% done | ETA(epoch): 690s + [Epoch 311][20/40] loss=0.855016 avg=0.876885 VRAM=38.8GiB | 62.3% done | ETA(epoch): 460s + [Epoch 311][30/40] loss=0.709895 avg=0.867159 VRAM=38.9GiB | 62.4% done | ETA(epoch): 230s + [Epoch 311][40/40] loss=0.781954 avg=0.861390 VRAM=38.8GiB | 62.4% done | ETA(epoch): 0s + Train loss: 0.861390 (919.7s) ETA: 3144min + Val loss: 0.826744 [t_0.0-0.2=1.0698 t_0.2-0.4=0.9783 t_0.4-0.6=0.8750 t_0.6-0.8=0.7033 t_0.8-1.0=0.6305] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0311 + Deleted old checkpoint: checkpoint_epoch_0308 +[MEM @ epoch 311 end] RAM: 19.4/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 312/499 (62% done) --- + [Epoch 312][10/40] loss=0.915273 avg=0.880004 VRAM=38.9GiB | 62.5% done | ETA(epoch): 690s + [Epoch 312][20/40] loss=0.782388 avg=0.864639 VRAM=38.8GiB | 62.5% done | ETA(epoch): 460s + [Epoch 312][30/40] loss=0.885170 avg=0.860204 VRAM=38.9GiB | 62.5% done | ETA(epoch): 230s + [Epoch 312][40/40] loss=0.858439 avg=0.861833 VRAM=38.8GiB | 62.6% done | ETA(epoch): 0s + Train loss: 0.861833 (920.0s) ETA: 3127min + Val loss: 0.849725 [t_0.0-0.2=1.0772 t_0.2-0.4=0.9927 t_0.4-0.6=0.8602 t_0.6-0.8=0.7023 t_0.8-1.0=0.6373] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0312 + Deleted old checkpoint: checkpoint_epoch_0309 +[MEM @ epoch 312 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 313/499 (63% done) --- + [Epoch 313][10/40] loss=0.931469 avg=0.868416 VRAM=38.9GiB | 62.6% done | ETA(epoch): 690s + [Epoch 313][20/40] loss=0.957283 avg=0.879421 VRAM=38.8GiB | 62.7% done | ETA(epoch): 460s + [Epoch 313][30/40] loss=0.801081 avg=0.869881 VRAM=38.9GiB | 62.7% done | ETA(epoch): 230s + [Epoch 313][40/40] loss=0.789720 avg=0.860538 VRAM=38.8GiB | 62.8% done | ETA(epoch): 0s + Train loss: 0.860538 (919.8s) ETA: 3110min + Val loss: 0.884550 [t_0.0-0.2=1.0796 t_0.2-0.4=1.0140 t_0.4-0.6=0.8158 t_0.6-0.8=0.7124 t_0.8-1.0=0.6563] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0313 + Deleted old checkpoint: checkpoint_epoch_0310 +[MEM @ epoch 313 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 314/499 (63% done) --- + [Epoch 314][10/40] loss=0.752163 avg=0.861651 VRAM=38.9GiB | 62.8% done | ETA(epoch): 690s + [Epoch 314][20/40] loss=0.862679 avg=0.848416 VRAM=38.8GiB | 62.9% done | ETA(epoch): 460s + [Epoch 314][30/40] loss=0.796379 avg=0.840380 VRAM=38.9GiB | 62.9% done | ETA(epoch): 230s + [Epoch 314][40/40] loss=0.881729 avg=0.841938 VRAM=38.8GiB | 63.0% done | ETA(epoch): 0s + Train loss: 0.841938 (919.8s) ETA: 3093min + Val loss: 0.845476 [t_0.0-0.2=1.0825 t_0.2-0.4=0.9780 t_0.4-0.6=0.8575 t_0.6-0.8=0.7269 t_0.8-1.0=0.6276] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0314 + Deleted old checkpoint: checkpoint_epoch_0311 +[MEM @ epoch 314 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 315/499 (63% done) --- + [Epoch 315][10/40] loss=0.890471 avg=0.870215 VRAM=38.9GiB | 63.0% done | ETA(epoch): 689s + [Epoch 315][20/40] loss=0.864660 avg=0.863126 VRAM=38.8GiB | 63.1% done | ETA(epoch): 460s + [Epoch 315][30/40] loss=0.938276 avg=0.852953 VRAM=38.9GiB | 63.1% done | ETA(epoch): 230s + [Epoch 315][40/40] loss=0.788673 avg=0.852459 VRAM=38.8GiB | 63.2% done | ETA(epoch): 0s + Train loss: 0.852459 (919.5s) ETA: 3076min + Val loss: 0.870432 [t_0.0-0.2=1.0666 t_0.2-0.4=0.9953 t_0.4-0.6=0.8180 t_0.6-0.8=0.7112 t_0.8-1.0=0.6247] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0315 + Deleted old checkpoint: checkpoint_epoch_0312 +[MEM @ epoch 315 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 316/499 (63% done) --- + [Epoch 316][10/40] loss=0.765949 avg=0.852129 VRAM=38.9GiB | 63.2% done | ETA(epoch): 690s + [Epoch 316][20/40] loss=0.781075 avg=0.844879 VRAM=38.8GiB | 63.3% done | ETA(epoch): 460s + [Epoch 316][30/40] loss=0.775649 avg=0.841900 VRAM=38.9GiB | 63.3% done | ETA(epoch): 230s + [Epoch 316][40/40] loss=0.817793 avg=0.838349 VRAM=38.8GiB | 63.4% done | ETA(epoch): 0s + Train loss: 0.838349 (920.2s) ETA: 3059min + Val loss: 0.844505 [t_0.0-0.2=1.0642 t_0.2-0.4=0.9791 t_0.4-0.6=0.8323 t_0.6-0.8=0.7085 t_0.8-1.0=0.6235] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0316 + Deleted old checkpoint: checkpoint_epoch_0313 +[MEM @ epoch 316 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 317/499 (63% done) --- + [Epoch 317][10/40] loss=0.876980 avg=0.845885 VRAM=38.9GiB | 63.4% done | ETA(epoch): 690s + [Epoch 317][20/40] loss=0.885470 avg=0.851644 VRAM=38.8GiB | 63.5% done | ETA(epoch): 460s + [Epoch 317][30/40] loss=0.787464 avg=0.844187 VRAM=38.9GiB | 63.5% done | ETA(epoch): 230s + [Epoch 317][40/40] loss=0.870337 avg=0.840652 VRAM=38.8GiB | 63.6% done | ETA(epoch): 0s + Train loss: 0.840652 (920.3s) ETA: 3042min + Val loss: 0.846410 [t_0.0-0.2=1.0679 t_0.2-0.4=0.9865 t_0.4-0.6=0.8494 t_0.6-0.8=0.7273 t_0.8-1.0=0.6288] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0317 + Deleted old checkpoint: checkpoint_epoch_0314 +[MEM @ epoch 317 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 318/499 (64% done) --- + [Epoch 318][10/40] loss=0.946399 avg=0.868375 VRAM=38.9GiB | 63.6% done | ETA(epoch): 690s + [Epoch 318][20/40] loss=0.862403 avg=0.873449 VRAM=38.8GiB | 63.7% done | ETA(epoch): 460s + [Epoch 318][30/40] loss=0.730526 avg=0.859775 VRAM=38.9GiB | 63.7% done | ETA(epoch): 230s + [Epoch 318][40/40] loss=0.809876 avg=0.858916 VRAM=38.8GiB | 63.8% done | ETA(epoch): 0s + Train loss: 0.858916 (920.6s) ETA: 3025min + Val loss: 0.850859 [t_0.0-0.2=1.0687 t_0.2-0.4=0.9643 t_0.4-0.6=0.8367 t_0.6-0.8=0.7204 t_0.8-1.0=0.6378] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0318 + Deleted old checkpoint: checkpoint_epoch_0315 +[MEM @ epoch 318 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 319/499 (64% done) --- + [Epoch 319][10/40] loss=0.834183 avg=0.851288 VRAM=38.9GiB | 63.8% done | ETA(epoch): 691s + [Epoch 319][20/40] loss=0.847716 avg=0.852185 VRAM=38.8GiB | 63.9% done | ETA(epoch): 460s + [Epoch 319][30/40] loss=0.820190 avg=0.855062 VRAM=38.9GiB | 63.9% done | ETA(epoch): 230s + [Epoch 319][40/40] loss=0.761037 avg=0.855566 VRAM=38.8GiB | 64.0% done | ETA(epoch): 0s + Train loss: 0.855566 (920.0s) ETA: 3008min + Val loss: 0.842944 [t_0.0-0.2=1.0543 t_0.2-0.4=0.9861 t_0.4-0.6=0.8722 t_0.6-0.8=0.7166 t_0.8-1.0=0.6332] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0319 + Deleted old checkpoint: checkpoint_epoch_0316 +[MEM @ epoch 319 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 320/499 (64% done) --- + [Epoch 320][10/40] loss=0.903416 avg=0.891746 VRAM=38.9GiB | 64.0% done | ETA(epoch): 690s + [Epoch 320][20/40] loss=0.947217 avg=0.888678 VRAM=38.8GiB | 64.1% done | ETA(epoch): 460s + [Epoch 320][30/40] loss=0.776329 avg=0.862392 VRAM=38.9GiB | 64.1% done | ETA(epoch): 230s + [Epoch 320][40/40] loss=0.741987 avg=0.852545 VRAM=38.8GiB | 64.2% done | ETA(epoch): 0s + Train loss: 0.852545 (919.7s) ETA: 2992min + Val loss: 0.842571 [t_0.0-0.2=1.0881 t_0.2-0.4=1.0073 t_0.4-0.6=0.8070 t_0.6-0.8=0.7078 t_0.8-1.0=0.6252] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0320 + Deleted old checkpoint: checkpoint_epoch_0317 +[MEM @ epoch 320 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 321/499 (64% done) --- + [Epoch 321][10/40] loss=0.857144 avg=0.841296 VRAM=38.9GiB | 64.2% done | ETA(epoch): 690s + [Epoch 321][20/40] loss=0.850836 avg=0.841061 VRAM=38.8GiB | 64.3% done | ETA(epoch): 460s + [Epoch 321][30/40] loss=0.766965 avg=0.837952 VRAM=38.9GiB | 64.3% done | ETA(epoch): 230s + [Epoch 321][40/40] loss=0.813951 avg=0.837561 VRAM=38.8GiB | 64.4% done | ETA(epoch): 0s + Train loss: 0.837561 (920.0s) ETA: 2975min + Val loss: 0.840084 [t_0.0-0.2=1.0655 t_0.2-0.4=1.0142 t_0.4-0.6=0.8411 t_0.6-0.8=0.7008 t_0.8-1.0=0.6312] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0321 + Deleted old checkpoint: checkpoint_epoch_0318 +[MEM @ epoch 321 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 322/499 (64% done) --- + [Epoch 322][10/40] loss=0.858982 avg=0.815503 VRAM=38.9GiB | 64.5% done | ETA(epoch): 690s + [Epoch 322][20/40] loss=0.786460 avg=0.831288 VRAM=38.8GiB | 64.5% done | ETA(epoch): 460s + [Epoch 322][30/40] loss=0.768221 avg=0.837774 VRAM=38.9GiB | 64.5% done | ETA(epoch): 230s + [Epoch 322][40/40] loss=0.794895 avg=0.840885 VRAM=38.8GiB | 64.6% done | ETA(epoch): 0s + Train loss: 0.840885 (920.0s) ETA: 2958min + Val loss: 0.859709 [t_0.0-0.2=1.0837 t_0.2-0.4=0.9983 t_0.4-0.6=0.8656 t_0.6-0.8=0.7040 t_0.8-1.0=0.6252] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0322 + Deleted old checkpoint: checkpoint_epoch_0319 +[MEM @ epoch 322 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 323/499 (65% done) --- + [Epoch 323][10/40] loss=0.907236 avg=0.846098 VRAM=38.9GiB | 64.6% done | ETA(epoch): 690s + [Epoch 323][20/40] loss=0.788519 avg=0.836371 VRAM=38.8GiB | 64.7% done | ETA(epoch): 460s + [Epoch 323][30/40] loss=0.857649 avg=0.849232 VRAM=38.9GiB | 64.8% done | ETA(epoch): 230s + [Epoch 323][40/40] loss=0.774854 avg=0.854834 VRAM=38.8GiB | 64.8% done | ETA(epoch): 0s + Train loss: 0.854834 (920.4s) ETA: 2941min + Val loss: 0.863744 [t_0.0-0.2=1.0715 t_0.2-0.4=1.0169 t_0.4-0.6=0.8248 t_0.6-0.8=0.7181 t_0.8-1.0=0.6195] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0323 + Deleted old checkpoint: checkpoint_epoch_0320 +[MEM @ epoch 323 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 324/499 (65% done) --- + [Epoch 324][10/40] loss=0.994624 avg=0.861546 VRAM=38.9GiB | 64.8% done | ETA(epoch): 690s + [Epoch 324][20/40] loss=0.800413 avg=0.860629 VRAM=38.8GiB | 64.9% done | ETA(epoch): 460s + [Epoch 324][30/40] loss=0.800199 avg=0.850827 VRAM=38.9GiB | 65.0% done | ETA(epoch): 230s + [Epoch 324][40/40] loss=0.823051 avg=0.849552 VRAM=38.8GiB | 65.0% done | ETA(epoch): 0s + Train loss: 0.849552 (920.9s) ETA: 2924min + Val loss: 0.858353 [t_0.0-0.2=1.0746 t_0.2-0.4=0.9996 t_0.4-0.6=0.8396 t_0.6-0.8=0.7198 t_0.8-1.0=0.6488] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0324 + Deleted old checkpoint: checkpoint_epoch_0321 +[MEM @ epoch 324 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 325/499 (65% done) --- + [Epoch 325][10/40] loss=0.861258 avg=0.813008 VRAM=38.9GiB | 65.0% done | ETA(epoch): 691s + [Epoch 325][20/40] loss=0.815910 avg=0.821350 VRAM=38.8GiB | 65.1% done | ETA(epoch): 460s + [Epoch 325][30/40] loss=0.870198 avg=0.830522 VRAM=38.9GiB | 65.1% done | ETA(epoch): 230s + [Epoch 325][40/40] loss=0.874311 avg=0.832674 VRAM=38.8GiB | 65.2% done | ETA(epoch): 0s + Train loss: 0.832674 (920.6s) ETA: 2907min + Val loss: 0.839423 [t_0.0-0.2=1.0644 t_0.2-0.4=0.9769 t_0.4-0.6=0.8397 t_0.6-0.8=0.7021 t_0.8-1.0=0.6139] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0325 + Deleted old checkpoint: checkpoint_epoch_0322 +[MEM @ epoch 325 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 326/499 (65% done) --- + [Epoch 326][10/40] loss=0.874888 avg=0.856147 VRAM=38.9GiB | 65.2% done | ETA(epoch): 691s + [Epoch 326][20/40] loss=0.820186 avg=0.840061 VRAM=38.8GiB | 65.3% done | ETA(epoch): 461s + [Epoch 326][30/40] loss=0.861005 avg=0.843619 VRAM=38.9GiB | 65.3% done | ETA(epoch): 230s + [Epoch 326][40/40] loss=0.859276 avg=0.847863 VRAM=38.8GiB | 65.4% done | ETA(epoch): 0s + Train loss: 0.847863 (921.3s) ETA: 2890min + Val loss: 0.820504 [t_0.0-0.2=1.0703 t_0.2-0.4=1.0112 t_0.4-0.6=0.8107 t_0.6-0.8=0.7055 t_0.8-1.0=0.6504] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0326 + Deleted old checkpoint: checkpoint_epoch_0323 +[MEM @ epoch 326 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 327/499 (65% done) --- + [Epoch 327][10/40] loss=0.850369 avg=0.826331 VRAM=38.9GiB | 65.5% done | ETA(epoch): 691s + [Epoch 327][20/40] loss=0.905917 avg=0.854799 VRAM=38.8GiB | 65.5% done | ETA(epoch): 461s + [Epoch 327][30/40] loss=0.878854 avg=0.851021 VRAM=38.9GiB | 65.5% done | ETA(epoch): 230s + [Epoch 327][40/40] loss=0.890306 avg=0.857962 VRAM=38.8GiB | 65.6% done | ETA(epoch): 0s + Train loss: 0.857962 (921.5s) ETA: 2874min + Val loss: 0.823798 [t_0.0-0.2=1.0695 t_0.2-0.4=0.9746 t_0.4-0.6=0.8568 t_0.6-0.8=0.7060 t_0.8-1.0=0.6303] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0327 + Deleted old checkpoint: checkpoint_epoch_0324 +[MEM @ epoch 327 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 328/499 (66% done) --- + [Epoch 328][10/40] loss=0.745668 avg=0.849702 VRAM=38.9GiB | 65.6% done | ETA(epoch): 691s + [Epoch 328][20/40] loss=0.833251 avg=0.852702 VRAM=38.8GiB | 65.7% done | ETA(epoch): 460s + [Epoch 328][30/40] loss=0.831476 avg=0.851189 VRAM=38.9GiB | 65.8% done | ETA(epoch): 230s + [Epoch 328][40/40] loss=0.729435 avg=0.848667 VRAM=38.8GiB | 65.8% done | ETA(epoch): 0s + Train loss: 0.848667 (920.8s) ETA: 2857min + Val loss: 0.874639 [t_0.0-0.2=1.0603 t_0.2-0.4=0.9775 t_0.4-0.6=0.8620 t_0.6-0.8=0.7127 t_0.8-1.0=0.6288] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0328 + Deleted old checkpoint: checkpoint_epoch_0325 +[MEM @ epoch 328 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 329/499 (66% done) --- + [Epoch 329][10/40] loss=0.847495 avg=0.840647 VRAM=38.9GiB | 65.8% done | ETA(epoch): 690s + [Epoch 329][20/40] loss=0.850461 avg=0.827953 VRAM=38.8GiB | 65.9% done | ETA(epoch): 460s + [Epoch 329][30/40] loss=0.790788 avg=0.829549 VRAM=38.9GiB | 66.0% done | ETA(epoch): 230s + [Epoch 329][40/40] loss=0.848698 avg=0.838628 VRAM=38.8GiB | 66.0% done | ETA(epoch): 0s + Train loss: 0.838628 (920.4s) ETA: 2840min + Val loss: 0.858828 [t_0.0-0.2=1.0616 t_0.2-0.4=0.9752 t_0.4-0.6=0.8627 t_0.6-0.8=0.7122 t_0.8-1.0=0.6324] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0329 + Deleted old checkpoint: checkpoint_epoch_0326 +[MEM @ epoch 329 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 330/499 (66% done) --- + [Epoch 330][10/40] loss=0.830809 avg=0.865952 VRAM=38.9GiB | 66.0% done | ETA(epoch): 690s + [Epoch 330][20/40] loss=0.805631 avg=0.865942 VRAM=38.8GiB | 66.1% done | ETA(epoch): 460s + [Epoch 330][30/40] loss=0.902084 avg=0.862528 VRAM=38.9GiB | 66.1% done | ETA(epoch): 230s + [Epoch 330][40/40] loss=0.912642 avg=0.857780 VRAM=38.8GiB | 66.2% done | ETA(epoch): 0s + Train loss: 0.857780 (921.1s) ETA: 2823min + Val loss: 0.840612 [t_0.0-0.2=1.0676 t_0.2-0.4=0.9852 t_0.4-0.6=0.8269 t_0.6-0.8=0.7147 t_0.8-1.0=0.6413] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0330 + Deleted old checkpoint: checkpoint_epoch_0327 +[MEM @ epoch 330 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 331/499 (66% done) --- + [Epoch 331][10/40] loss=0.882294 avg=0.863359 VRAM=38.9GiB | 66.2% done | ETA(epoch): 690s + [Epoch 331][20/40] loss=0.882032 avg=0.860959 VRAM=38.8GiB | 66.3% done | ETA(epoch): 460s + [Epoch 331][30/40] loss=0.879519 avg=0.852468 VRAM=38.9GiB | 66.3% done | ETA(epoch): 230s + [Epoch 331][40/40] loss=0.804096 avg=0.848392 VRAM=38.8GiB | 66.4% done | ETA(epoch): 0s + Train loss: 0.848392 (920.9s) ETA: 2806min + Val loss: 0.838814 [t_0.0-0.2=1.0877 t_0.2-0.4=0.9567 t_0.4-0.6=0.8520 t_0.6-0.8=0.7266 t_0.8-1.0=0.6393] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0331 + Deleted old checkpoint: checkpoint_epoch_0328 +[MEM @ epoch 331 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 332/499 (66% done) --- + [Epoch 332][10/40] loss=0.922598 avg=0.843896 VRAM=38.9GiB | 66.5% done | ETA(epoch): 691s + [Epoch 332][20/40] loss=0.842818 avg=0.846063 VRAM=38.8GiB | 66.5% done | ETA(epoch): 460s + [Epoch 332][30/40] loss=0.859203 avg=0.850547 VRAM=38.9GiB | 66.5% done | ETA(epoch): 230s + [Epoch 332][40/40] loss=0.795554 avg=0.854184 VRAM=38.8GiB | 66.6% done | ETA(epoch): 0s + Train loss: 0.854184 (920.8s) ETA: 2789min + Val loss: 0.818070 [t_0.0-0.2=1.0776 t_0.2-0.4=0.9879 t_0.4-0.6=0.7647 t_0.6-0.8=0.7041 t_0.8-1.0=0.6342] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0332 + Deleted old checkpoint: checkpoint_epoch_0329 +[MEM @ epoch 332 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 333/499 (67% done) --- + [Epoch 333][10/40] loss=0.888895 avg=0.884231 VRAM=38.9GiB | 66.6% done | ETA(epoch): 690s + [Epoch 333][20/40] loss=0.773928 avg=0.872023 VRAM=38.8GiB | 66.7% done | ETA(epoch): 460s + [Epoch 333][30/40] loss=0.872137 avg=0.856306 VRAM=38.9GiB | 66.8% done | ETA(epoch): 230s + [Epoch 333][40/40] loss=0.810043 avg=0.849196 VRAM=38.8GiB | 66.8% done | ETA(epoch): 0s + Train loss: 0.849196 (920.5s) ETA: 2773min + Val loss: 0.863285 [t_0.0-0.2=1.0687 t_0.2-0.4=0.9528 t_0.4-0.6=0.8399 t_0.6-0.8=0.7470 t_0.8-1.0=0.6323] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0333 + Deleted old checkpoint: checkpoint_epoch_0330 +[MEM @ epoch 333 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 334/499 (67% done) --- + [Epoch 334][10/40] loss=0.804839 avg=0.873141 VRAM=38.9GiB | 66.8% done | ETA(epoch): 689s + [Epoch 334][20/40] loss=0.808053 avg=0.865977 VRAM=38.8GiB | 66.9% done | ETA(epoch): 460s + [Epoch 334][30/40] loss=0.828328 avg=0.868213 VRAM=38.9GiB | 67.0% done | ETA(epoch): 230s + [Epoch 334][40/40] loss=0.954865 avg=0.855884 VRAM=38.8GiB | 67.0% done | ETA(epoch): 0s + Train loss: 0.855884 (920.3s) ETA: 2756min + Val loss: 0.870827 [t_0.0-0.2=1.0706 t_0.2-0.4=0.9961 t_0.4-0.6=0.8339 t_0.6-0.8=0.7200 t_0.8-1.0=0.6360] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0334 + Deleted old checkpoint: checkpoint_epoch_0331 +[MEM @ epoch 334 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 335/499 (67% done) --- + [Epoch 335][10/40] loss=0.895628 avg=0.848650 VRAM=38.9GiB | 67.0% done | ETA(epoch): 690s + [Epoch 335][20/40] loss=0.859442 avg=0.854679 VRAM=38.8GiB | 67.1% done | ETA(epoch): 460s + [Epoch 335][30/40] loss=0.869242 avg=0.864773 VRAM=38.9GiB | 67.2% done | ETA(epoch): 230s + [Epoch 335][40/40] loss=0.829502 avg=0.848908 VRAM=38.8GiB | 67.2% done | ETA(epoch): 0s + Train loss: 0.848908 (920.7s) ETA: 2739min + Val loss: 0.864069 [t_0.0-0.2=1.0663 t_0.2-0.4=0.9833 t_0.4-0.6=0.8654 t_0.6-0.8=0.7006 t_0.8-1.0=0.6402] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0335 + Deleted old checkpoint: checkpoint_epoch_0332 +[MEM @ epoch 335 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 336/499 (67% done) --- + [Epoch 336][10/40] loss=0.830418 avg=0.806341 VRAM=38.9GiB | 67.2% done | ETA(epoch): 691s + [Epoch 336][20/40] loss=0.764735 avg=0.819331 VRAM=38.8GiB | 67.3% done | ETA(epoch): 461s + [Epoch 336][30/40] loss=0.882442 avg=0.835035 VRAM=38.9GiB | 67.3% done | ETA(epoch): 230s + [Epoch 336][40/40] loss=0.770686 avg=0.831332 VRAM=38.8GiB | 67.4% done | ETA(epoch): 0s + Train loss: 0.831332 (921.0s) ETA: 2722min + Val loss: 0.830716 [t_0.0-0.2=1.0716 t_0.2-0.4=0.9892 t_0.4-0.6=0.8667 t_0.6-0.8=0.7139 t_0.8-1.0=0.6316] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0336 + Deleted old checkpoint: checkpoint_epoch_0333 +[MEM @ epoch 336 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 337/499 (67% done) --- + [Epoch 337][10/40] loss=0.835293 avg=0.812098 VRAM=38.9GiB | 67.5% done | ETA(epoch): 690s + [Epoch 337][20/40] loss=0.929045 avg=0.833856 VRAM=38.8GiB | 67.5% done | ETA(epoch): 460s + [Epoch 337][30/40] loss=0.728914 avg=0.830639 VRAM=38.9GiB | 67.5% done | ETA(epoch): 230s + [Epoch 337][40/40] loss=0.949683 avg=0.840572 VRAM=38.8GiB | 67.6% done | ETA(epoch): 0s + Train loss: 0.840572 (920.9s) ETA: 2705min + Val loss: 0.844527 [t_0.0-0.2=1.0851 t_0.2-0.4=0.9910 t_0.4-0.6=0.8306 t_0.6-0.8=0.7104 t_0.8-1.0=0.6365] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0337 + Deleted old checkpoint: checkpoint_epoch_0334 +[MEM @ epoch 337 end] RAM: 19.5/188.4 GiB (10.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 338/499 (68% done) --- + [Epoch 338][10/40] loss=0.779417 avg=0.867799 VRAM=38.9GiB | 67.7% done | ETA(epoch): 691s + [Epoch 338][20/40] loss=0.850292 avg=0.851594 VRAM=38.8GiB | 67.7% done | ETA(epoch): 461s + [Epoch 338][30/40] loss=0.849831 avg=0.842581 VRAM=38.9GiB | 67.8% done | ETA(epoch): 230s + [Epoch 338][40/40] loss=0.798872 avg=0.841021 VRAM=38.8GiB | 67.8% done | ETA(epoch): 0s + Train loss: 0.841021 (921.8s) ETA: 2688min + Val loss: 0.850294 [t_0.0-0.2=1.0709 t_0.2-0.4=1.0242 t_0.4-0.6=0.8246 t_0.6-0.8=0.7482 t_0.8-1.0=0.6376] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0338 + Deleted old checkpoint: checkpoint_epoch_0335 +[MEM @ epoch 338 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 339/499 (68% done) --- + [Epoch 339][10/40] loss=0.851893 avg=0.867888 VRAM=38.9GiB | 67.8% done | ETA(epoch): 691s + [Epoch 339][20/40] loss=0.944167 avg=0.878179 VRAM=38.8GiB | 67.9% done | ETA(epoch): 461s + [Epoch 339][30/40] loss=0.696699 avg=0.861560 VRAM=38.9GiB | 68.0% done | ETA(epoch): 230s + [Epoch 339][40/40] loss=0.848044 avg=0.859231 VRAM=38.8GiB | 68.0% done | ETA(epoch): 0s + Train loss: 0.859231 (921.3s) ETA: 2672min + Val loss: 0.849126 [t_0.0-0.2=1.0605 t_0.2-0.4=1.0014 t_0.4-0.6=0.8714 t_0.6-0.8=0.7133 t_0.8-1.0=0.6062] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0339 + Deleted old checkpoint: checkpoint_epoch_0336 +[MEM @ epoch 339 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 340/499 (68% done) --- + [Epoch 340][10/40] loss=0.759234 avg=0.837452 VRAM=38.9GiB | 68.0% done | ETA(epoch): 691s + [Epoch 340][20/40] loss=0.931033 avg=0.852212 VRAM=38.8GiB | 68.1% done | ETA(epoch): 461s + [Epoch 340][30/40] loss=0.893363 avg=0.852301 VRAM=38.9GiB | 68.2% done | ETA(epoch): 230s + [Epoch 340][40/40] loss=0.888902 avg=0.854161 VRAM=38.8GiB | 68.2% done | ETA(epoch): 0s + Train loss: 0.854161 (921.2s) ETA: 2655min + Val loss: 0.883398 [t_0.0-0.2=1.0669 t_0.2-0.4=0.9963 t_0.4-0.6=0.8399 t_0.6-0.8=0.7311 t_0.8-1.0=0.6218] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0340 + Deleted old checkpoint: checkpoint_epoch_0337 +[MEM @ epoch 340 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 341/499 (68% done) --- + [Epoch 341][10/40] loss=0.969554 avg=0.868374 VRAM=38.9GiB | 68.2% done | ETA(epoch): 691s + [Epoch 341][20/40] loss=0.792888 avg=0.849610 VRAM=38.8GiB | 68.3% done | ETA(epoch): 460s + [Epoch 341][30/40] loss=0.755924 avg=0.841971 VRAM=38.9GiB | 68.3% done | ETA(epoch): 230s + [Epoch 341][40/40] loss=0.906704 avg=0.841356 VRAM=38.8GiB | 68.4% done | ETA(epoch): 0s + Train loss: 0.841356 (920.9s) ETA: 2638min + Val loss: 0.863217 [t_0.0-0.2=1.0843 t_0.2-0.4=0.9985 t_0.4-0.6=0.8498 t_0.6-0.8=0.6942 t_0.8-1.0=0.6265] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0341 + Deleted old checkpoint: checkpoint_epoch_0338 +[MEM @ epoch 341 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 342/499 (68% done) --- + [Epoch 342][10/40] loss=0.909034 avg=0.874560 VRAM=38.9GiB | 68.5% done | ETA(epoch): 690s + [Epoch 342][20/40] loss=0.882760 avg=0.874719 VRAM=38.8GiB | 68.5% done | ETA(epoch): 460s + [Epoch 342][30/40] loss=0.888675 avg=0.875700 VRAM=38.9GiB | 68.5% done | ETA(epoch): 230s + [Epoch 342][40/40] loss=0.764669 avg=0.867617 VRAM=38.8GiB | 68.6% done | ETA(epoch): 0s + Train loss: 0.867617 (920.5s) ETA: 2621min + Val loss: 0.829372 [t_0.0-0.2=1.0792 t_0.2-0.4=0.9652 t_0.4-0.6=0.8172 t_0.6-0.8=0.7149 t_0.8-1.0=0.6285] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0342 + Deleted old checkpoint: checkpoint_epoch_0339 +[MEM @ epoch 342 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 343/499 (69% done) --- + [Epoch 343][10/40] loss=0.866023 avg=0.851844 VRAM=38.9GiB | 68.7% done | ETA(epoch): 690s + [Epoch 343][20/40] loss=0.835559 avg=0.861975 VRAM=38.8GiB | 68.7% done | ETA(epoch): 460s + [Epoch 343][30/40] loss=0.887880 avg=0.852453 VRAM=38.9GiB | 68.8% done | ETA(epoch): 230s + [Epoch 343][40/40] loss=0.680907 avg=0.855012 VRAM=38.8GiB | 68.8% done | ETA(epoch): 0s + Train loss: 0.855012 (920.9s) ETA: 2604min + Val loss: 0.836092 [t_0.0-0.2=1.0788 t_0.2-0.4=0.9952 t_0.4-0.6=0.8665 t_0.6-0.8=0.6890 t_0.8-1.0=0.6209] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0343 + Deleted old checkpoint: checkpoint_epoch_0340 +[MEM @ epoch 343 end] RAM: 19.7/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 344/499 (69% done) --- + [Epoch 344][10/40] loss=0.754597 avg=0.832097 VRAM=38.9GiB | 68.8% done | ETA(epoch): 691s + [Epoch 344][20/40] loss=0.902116 avg=0.842323 VRAM=38.8GiB | 68.9% done | ETA(epoch): 461s + [Epoch 344][30/40] loss=0.867940 avg=0.850611 VRAM=38.9GiB | 69.0% done | ETA(epoch): 230s + [Epoch 344][40/40] loss=0.809000 avg=0.851815 VRAM=38.8GiB | 69.0% done | ETA(epoch): 0s + Train loss: 0.851815 (920.6s) ETA: 2588min + Val loss: 0.855773 [t_0.0-0.2=1.0778 t_0.2-0.4=1.0038 t_0.4-0.6=0.8378 t_0.6-0.8=0.6779 t_0.8-1.0=0.6326] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0344 + Deleted old checkpoint: checkpoint_epoch_0341 +[MEM @ epoch 344 end] RAM: 19.6/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 345/499 (69% done) --- + [Epoch 345][10/40] loss=0.866837 avg=0.825428 VRAM=38.9GiB | 69.0% done | ETA(epoch): 690s + [Epoch 345][20/40] loss=0.949789 avg=0.850020 VRAM=38.8GiB | 69.1% done | ETA(epoch): 460s + [Epoch 345][30/40] loss=0.809103 avg=0.846748 VRAM=38.9GiB | 69.2% done | ETA(epoch): 230s + [Epoch 345][40/40] loss=0.911614 avg=0.847035 VRAM=38.8GiB | 69.2% done | ETA(epoch): 0s + Train loss: 0.847035 (920.5s) ETA: 2571min + Val loss: 0.842851 [t_0.0-0.2=1.0848 t_0.2-0.4=0.9822 t_0.4-0.6=0.8481 t_0.6-0.8=0.7155 t_0.8-1.0=0.6325] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0345 + Deleted old checkpoint: checkpoint_epoch_0342 +[MEM @ epoch 345 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 346/499 (69% done) --- + [Epoch 346][10/40] loss=0.906668 avg=0.842208 VRAM=38.9GiB | 69.2% done | ETA(epoch): 690s + [Epoch 346][20/40] loss=0.786139 avg=0.850237 VRAM=38.8GiB | 69.3% done | ETA(epoch): 460s + [Epoch 346][30/40] loss=0.879244 avg=0.850341 VRAM=38.9GiB | 69.3% done | ETA(epoch): 230s + [Epoch 346][40/40] loss=0.798844 avg=0.855872 VRAM=38.8GiB | 69.4% done | ETA(epoch): 0s + Train loss: 0.855872 (919.8s) ETA: 2554min + Val loss: 0.846513 [t_0.0-0.2=1.0856 t_0.2-0.4=1.0350 t_0.4-0.6=0.8533 t_0.6-0.8=0.6954 t_0.8-1.0=0.6355] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0346 + Deleted old checkpoint: checkpoint_epoch_0343 +[MEM @ epoch 346 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 347/499 (69% done) --- + [Epoch 347][10/40] loss=0.821630 avg=0.861435 VRAM=38.9GiB | 69.5% done | ETA(epoch): 691s + [Epoch 347][20/40] loss=0.906791 avg=0.871171 VRAM=38.8GiB | 69.5% done | ETA(epoch): 460s + [Epoch 347][30/40] loss=0.745473 avg=0.867029 VRAM=38.9GiB | 69.5% done | ETA(epoch): 230s + [Epoch 347][40/40] loss=0.876976 avg=0.866487 VRAM=38.8GiB | 69.6% done | ETA(epoch): 0s + Train loss: 0.866487 (921.3s) ETA: 2537min + Val loss: 0.873472 [t_0.0-0.2=1.0867 t_0.2-0.4=0.9821 t_0.4-0.6=0.8765 t_0.6-0.8=0.7053 t_0.8-1.0=0.6277] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0347 + Deleted old checkpoint: checkpoint_epoch_0344 +[MEM @ epoch 347 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 348/499 (70% done) --- + [Epoch 348][10/40] loss=0.841429 avg=0.849843 VRAM=38.9GiB | 69.7% done | ETA(epoch): 691s + [Epoch 348][20/40] loss=0.808456 avg=0.858693 VRAM=38.8GiB | 69.7% done | ETA(epoch): 460s + [Epoch 348][30/40] loss=0.929398 avg=0.864944 VRAM=38.9GiB | 69.8% done | ETA(epoch): 230s + [Epoch 348][40/40] loss=0.739662 avg=0.868285 VRAM=38.8GiB | 69.8% done | ETA(epoch): 0s + Train loss: 0.868285 (921.0s) ETA: 2520min + Val loss: 0.849321 [t_0.0-0.2=1.0743 t_0.2-0.4=0.9678 t_0.4-0.6=0.8499 t_0.6-0.8=0.7033 t_0.8-1.0=0.6242] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0348 + Deleted old checkpoint: checkpoint_epoch_0345 +[MEM @ epoch 348 end] RAM: 19.5/188.4 GiB (10.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 349/499 (70% done) --- + [Epoch 349][10/40] loss=0.839307 avg=0.873330 VRAM=38.9GiB | 69.8% done | ETA(epoch): 690s + [Epoch 349][20/40] loss=0.838179 avg=0.842936 VRAM=38.8GiB | 69.9% done | ETA(epoch): 460s + [Epoch 349][30/40] loss=0.831796 avg=0.852805 VRAM=38.9GiB | 70.0% done | ETA(epoch): 230s + [Epoch 349][40/40] loss=0.847513 avg=0.853638 VRAM=38.8GiB | 70.0% done | ETA(epoch): 0s + [MilestoneVis] train_0 step 13999 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 13999 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 13999 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 13999 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 13999 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 13999 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 13999 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 13999 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 13999 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 13999 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + Train loss: 0.853638 (1675.1s) ETA: 2509min + Val loss: 0.827619 [t_0.0-0.2=1.0740 t_0.2-0.4=1.0074 t_0.4-0.6=0.8517 t_0.6-0.8=0.7245 t_0.8-1.0=0.6214] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0349 + Deleted old checkpoint: checkpoint_epoch_0346 +[MEM @ epoch 349 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 350/499 (70% done) --- + [Epoch 350][10/40] loss=0.889752 avg=0.869602 VRAM=38.9GiB | 70.0% done | ETA(epoch): 691s + [Epoch 350][20/40] loss=0.905977 avg=0.861578 VRAM=38.8GiB | 70.1% done | ETA(epoch): 461s + [Epoch 350][30/40] loss=0.733445 avg=0.864916 VRAM=38.9GiB | 70.2% done | ETA(epoch): 230s + [Epoch 350][40/40] loss=0.897452 avg=0.862324 VRAM=38.8GiB | 70.2% done | ETA(epoch): 0s + Train loss: 0.862324 (920.9s) ETA: 2492min + Val loss: 0.863644 [t_0.0-0.2=1.0644 t_0.2-0.4=1.0035 t_0.4-0.6=0.8846 t_0.6-0.8=0.7059 t_0.8-1.0=0.6388] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0350 + Deleted old checkpoint: checkpoint_epoch_0347 +[MEM @ epoch 350 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 351/499 (70% done) --- + [Epoch 351][10/40] loss=0.921088 avg=0.887828 VRAM=38.9GiB | 70.2% done | ETA(epoch): 690s + [Epoch 351][20/40] loss=0.731692 avg=0.848575 VRAM=38.8GiB | 70.3% done | ETA(epoch): 460s + [Epoch 351][30/40] loss=0.931313 avg=0.851918 VRAM=38.9GiB | 70.3% done | ETA(epoch): 230s + [Epoch 351][40/40] loss=0.868233 avg=0.852278 VRAM=38.8GiB | 70.4% done | ETA(epoch): 0s + Train loss: 0.852278 (921.0s) ETA: 2475min + Val loss: 0.851392 [t_0.0-0.2=1.0767 t_0.2-0.4=1.0006 t_0.4-0.6=0.8405 t_0.6-0.8=0.6883 t_0.8-1.0=0.6504] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0351 + Deleted old checkpoint: checkpoint_epoch_0348 +[MEM @ epoch 351 end] RAM: 20.1/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 352/499 (70% done) --- + [Epoch 352][10/40] loss=0.806236 avg=0.846993 VRAM=38.9GiB | 70.5% done | ETA(epoch): 691s + [Epoch 352][20/40] loss=0.962594 avg=0.857998 VRAM=38.8GiB | 70.5% done | ETA(epoch): 461s + [Epoch 352][30/40] loss=0.810172 avg=0.851755 VRAM=38.9GiB | 70.5% done | ETA(epoch): 231s + [Epoch 352][40/40] loss=0.921752 avg=0.857972 VRAM=38.8GiB | 70.6% done | ETA(epoch): 0s + Train loss: 0.857972 (921.8s) ETA: 2458min + Val loss: 0.858784 [t_0.0-0.2=1.0745 t_0.2-0.4=0.9936 t_0.4-0.6=0.8595 t_0.6-0.8=0.6983 t_0.8-1.0=0.6292] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0352 + Deleted old checkpoint: checkpoint_epoch_0349 +[MEM @ epoch 352 end] RAM: 20.3/188.4 GiB (10.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 353/499 (71% done) --- + [Epoch 353][10/40] loss=0.868651 avg=0.855223 VRAM=38.9GiB | 70.7% done | ETA(epoch): 690s + [Epoch 353][20/40] loss=0.768525 avg=0.871621 VRAM=38.8GiB | 70.7% done | ETA(epoch): 460s + [Epoch 353][30/40] loss=0.839610 avg=0.868960 VRAM=38.9GiB | 70.8% done | ETA(epoch): 230s + [Epoch 353][40/40] loss=0.885129 avg=0.872153 VRAM=38.8GiB | 70.8% done | ETA(epoch): 0s + Train loss: 0.872153 (920.9s) ETA: 2442min + Val loss: 0.854252 [t_0.0-0.2=1.0631 t_0.2-0.4=1.0242 t_0.4-0.6=0.8484 t_0.6-0.8=0.7072 t_0.8-1.0=0.6337] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0353 + Deleted old checkpoint: checkpoint_epoch_0350 +[MEM @ epoch 353 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 354/499 (71% done) --- + [Epoch 354][10/40] loss=0.882183 avg=0.850402 VRAM=38.9GiB | 70.9% done | ETA(epoch): 691s + [Epoch 354][20/40] loss=0.767944 avg=0.840156 VRAM=38.8GiB | 70.9% done | ETA(epoch): 461s + [Epoch 354][30/40] loss=0.864976 avg=0.844151 VRAM=38.9GiB | 71.0% done | ETA(epoch): 230s + [Epoch 354][40/40] loss=0.788519 avg=0.847787 VRAM=38.8GiB | 71.0% done | ETA(epoch): 0s + Train loss: 0.847787 (921.4s) ETA: 2425min + Val loss: 0.847783 [t_0.0-0.2=1.0551 t_0.2-0.4=0.9692 t_0.4-0.6=0.8270 t_0.6-0.8=0.7004 t_0.8-1.0=0.6241] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0354 + Deleted old checkpoint: checkpoint_epoch_0351 +[MEM @ epoch 354 end] RAM: 20.1/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 355/499 (71% done) --- + [Epoch 355][10/40] loss=0.929760 avg=0.858772 VRAM=38.9GiB | 71.0% done | ETA(epoch): 691s + [Epoch 355][20/40] loss=0.926325 avg=0.857727 VRAM=38.8GiB | 71.1% done | ETA(epoch): 461s + [Epoch 355][30/40] loss=0.833990 avg=0.856613 VRAM=38.9GiB | 71.2% done | ETA(epoch): 230s + [Epoch 355][40/40] loss=0.788806 avg=0.847492 VRAM=38.8GiB | 71.2% done | ETA(epoch): 0s + Train loss: 0.847492 (921.0s) ETA: 2408min + Val loss: 0.818421 [t_0.0-0.2=1.0606 t_0.2-0.4=0.9922 t_0.4-0.6=0.8666 t_0.6-0.8=0.7079 t_0.8-1.0=0.6288] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0355 + Deleted old checkpoint: checkpoint_epoch_0352 +[MEM @ epoch 355 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 356/499 (71% done) --- + [Epoch 356][10/40] loss=0.754843 avg=0.846147 VRAM=38.9GiB | 71.2% done | ETA(epoch): 690s + [Epoch 356][20/40] loss=0.776944 avg=0.832761 VRAM=38.8GiB | 71.3% done | ETA(epoch): 460s + [Epoch 356][30/40] loss=0.901930 avg=0.842898 VRAM=38.9GiB | 71.4% done | ETA(epoch): 230s + [Epoch 356][40/40] loss=0.872967 avg=0.857705 VRAM=38.8GiB | 71.4% done | ETA(epoch): 0s + Train loss: 0.857705 (920.6s) ETA: 2391min + Val loss: 0.839058 [t_0.0-0.2=1.0630 t_0.2-0.4=0.9675 t_0.4-0.6=0.8675 t_0.6-0.8=0.6894 t_0.8-1.0=0.6381] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0356 + Deleted old checkpoint: checkpoint_epoch_0353 +[MEM @ epoch 356 end] RAM: 20.1/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 357/499 (71% done) --- + [Epoch 357][10/40] loss=0.862071 avg=0.866058 VRAM=38.9GiB | 71.5% done | ETA(epoch): 690s + [Epoch 357][20/40] loss=0.761455 avg=0.858537 VRAM=38.8GiB | 71.5% done | ETA(epoch): 460s + [Epoch 357][30/40] loss=0.926123 avg=0.861807 VRAM=38.9GiB | 71.5% done | ETA(epoch): 230s + [Epoch 357][40/40] loss=0.866474 avg=0.856759 VRAM=38.8GiB | 71.6% done | ETA(epoch): 0s + Train loss: 0.856759 (920.6s) ETA: 2374min + Val loss: 0.849727 [t_0.0-0.2=1.0592 t_0.2-0.4=0.9952 t_0.4-0.6=0.8596 t_0.6-0.8=0.7004 t_0.8-1.0=0.6337] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0357 + Deleted old checkpoint: checkpoint_epoch_0354 +[MEM @ epoch 357 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 358/499 (72% done) --- + [Epoch 358][10/40] loss=0.994028 avg=0.888148 VRAM=38.9GiB | 71.7% done | ETA(epoch): 690s + [Epoch 358][20/40] loss=0.730850 avg=0.855422 VRAM=38.8GiB | 71.7% done | ETA(epoch): 460s + [Epoch 358][30/40] loss=0.892265 avg=0.864602 VRAM=38.9GiB | 71.8% done | ETA(epoch): 230s + [Epoch 358][40/40] loss=0.878707 avg=0.867144 VRAM=38.8GiB | 71.8% done | ETA(epoch): 0s + Train loss: 0.867144 (920.1s) ETA: 2357min + Val loss: 0.845933 [t_0.0-0.2=1.0742 t_0.2-0.4=0.9557 t_0.4-0.6=0.8597 t_0.6-0.8=0.7078 t_0.8-1.0=0.6223] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0358 + Deleted old checkpoint: checkpoint_epoch_0355 +[MEM @ epoch 358 end] RAM: 20.1/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 359/499 (72% done) --- + [Epoch 359][10/40] loss=0.892364 avg=0.851964 VRAM=38.9GiB | 71.9% done | ETA(epoch): 689s + [Epoch 359][20/40] loss=0.805522 avg=0.854333 VRAM=38.8GiB | 71.9% done | ETA(epoch): 460s + [Epoch 359][30/40] loss=0.890895 avg=0.866515 VRAM=38.9GiB | 72.0% done | ETA(epoch): 230s + [Epoch 359][40/40] loss=0.820573 avg=0.871956 VRAM=38.8GiB | 72.0% done | ETA(epoch): 0s + Train loss: 0.871956 (920.2s) ETA: 2341min + Val loss: 0.844456 [t_0.0-0.2=1.0606 t_0.2-0.4=0.9768 t_0.4-0.6=0.8717 t_0.6-0.8=0.7038 t_0.8-1.0=0.6371] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0359 + Deleted old checkpoint: checkpoint_epoch_0356 +[MEM @ epoch 359 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 360/499 (72% done) --- + [Epoch 360][10/40] loss=0.851149 avg=0.877230 VRAM=38.9GiB | 72.0% done | ETA(epoch): 690s + [Epoch 360][20/40] loss=0.797655 avg=0.862763 VRAM=38.8GiB | 72.1% done | ETA(epoch): 460s + [Epoch 360][30/40] loss=0.869099 avg=0.853685 VRAM=38.9GiB | 72.2% done | ETA(epoch): 230s + [Epoch 360][40/40] loss=0.869132 avg=0.848766 VRAM=38.8GiB | 72.2% done | ETA(epoch): 0s + Train loss: 0.848766 (920.9s) ETA: 2324min + Val loss: 0.866486 [t_0.0-0.2=1.0746 t_0.2-0.4=0.9832 t_0.4-0.6=0.8454 t_0.6-0.8=0.7179 t_0.8-1.0=0.6351] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0360 + Deleted old checkpoint: checkpoint_epoch_0357 +[MEM @ epoch 360 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 361/499 (72% done) --- + [Epoch 361][10/40] loss=0.897820 avg=0.839631 VRAM=38.9GiB | 72.2% done | ETA(epoch): 691s + [Epoch 361][20/40] loss=0.878117 avg=0.842426 VRAM=38.8GiB | 72.3% done | ETA(epoch): 461s + [Epoch 361][30/40] loss=0.946537 avg=0.846431 VRAM=38.9GiB | 72.4% done | ETA(epoch): 230s + [Epoch 361][40/40] loss=0.908731 avg=0.842110 VRAM=38.8GiB | 72.4% done | ETA(epoch): 0s + Train loss: 0.842110 (921.4s) ETA: 2307min + Val loss: 0.853611 [t_0.0-0.2=1.0580 t_0.2-0.4=1.0088 t_0.4-0.6=0.8428 t_0.6-0.8=0.6750 t_0.8-1.0=0.6508] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0361 + Deleted old checkpoint: checkpoint_epoch_0358 +[MEM @ epoch 361 end] RAM: 20.2/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 362/499 (72% done) --- + [Epoch 362][10/40] loss=0.860041 avg=0.811166 VRAM=38.9GiB | 72.5% done | ETA(epoch): 691s + [Epoch 362][20/40] loss=0.891496 avg=0.822325 VRAM=38.8GiB | 72.5% done | ETA(epoch): 461s + [Epoch 362][30/40] loss=0.831798 avg=0.824184 VRAM=38.9GiB | 72.5% done | ETA(epoch): 230s + [Epoch 362][40/40] loss=0.891521 avg=0.828326 VRAM=38.8GiB | 72.6% done | ETA(epoch): 0s + Train loss: 0.828326 (921.3s) ETA: 2290min + Val loss: 0.818033 [t_0.0-0.2=1.0710 t_0.2-0.4=0.9939 t_0.4-0.6=0.8219 t_0.6-0.8=0.7130 t_0.8-1.0=0.6445] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0362 + Deleted old checkpoint: checkpoint_epoch_0359 +[MEM @ epoch 362 end] RAM: 20.1/188.4 GiB (10.7%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 363/499 (73% done) --- + [Epoch 363][10/40] loss=0.832151 avg=0.866688 VRAM=38.9GiB | 72.7% done | ETA(epoch): 691s + [Epoch 363][20/40] loss=0.856203 avg=0.849026 VRAM=38.8GiB | 72.7% done | ETA(epoch): 460s + [Epoch 363][30/40] loss=0.865634 avg=0.849617 VRAM=38.9GiB | 72.8% done | ETA(epoch): 230s + [Epoch 363][40/40] loss=0.805154 avg=0.843611 VRAM=38.8GiB | 72.8% done | ETA(epoch): 0s + Train loss: 0.843611 (920.3s) ETA: 2273min + Val loss: 0.844510 [t_0.0-0.2=1.0633 t_0.2-0.4=0.9801 t_0.4-0.6=0.8247 t_0.6-0.8=0.7255 t_0.8-1.0=0.6437] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0363 + Deleted old checkpoint: checkpoint_epoch_0360 +[MEM @ epoch 363 end] RAM: 21.1/188.4 GiB (11.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 364/499 (73% done) --- + [Epoch 364][10/40] loss=0.930550 avg=0.832923 VRAM=38.9GiB | 72.9% done | ETA(epoch): 691s + [Epoch 364][20/40] loss=0.813486 avg=0.852742 VRAM=38.8GiB | 72.9% done | ETA(epoch): 461s + [Epoch 364][30/40] loss=0.897860 avg=0.855968 VRAM=38.9GiB | 73.0% done | ETA(epoch): 230s + [Epoch 364][40/40] loss=0.901151 avg=0.857425 VRAM=38.8GiB | 73.0% done | ETA(epoch): 0s + Train loss: 0.857425 (921.2s) ETA: 2256min + Val loss: 0.832685 [t_0.0-0.2=1.0682 t_0.2-0.4=1.0140 t_0.4-0.6=0.8469 t_0.6-0.8=0.6874 t_0.8-1.0=0.6472] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0364 + Deleted old checkpoint: checkpoint_epoch_0361 +[MEM @ epoch 364 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 365/499 (73% done) --- + [Epoch 365][10/40] loss=0.939781 avg=0.857657 VRAM=38.9GiB | 73.0% done | ETA(epoch): 690s + [Epoch 365][20/40] loss=0.890735 avg=0.863986 VRAM=38.8GiB | 73.1% done | ETA(epoch): 461s + [Epoch 365][30/40] loss=0.852441 avg=0.853252 VRAM=38.9GiB | 73.2% done | ETA(epoch): 230s + [Epoch 365][40/40] loss=0.743875 avg=0.853626 VRAM=38.8GiB | 73.2% done | ETA(epoch): 0s + Train loss: 0.853626 (920.9s) ETA: 2240min + Val loss: 0.824763 [t_0.0-0.2=1.0923 t_0.2-0.4=0.9685 t_0.4-0.6=0.8196 t_0.6-0.8=0.7315 t_0.8-1.0=0.6413] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0365 + Deleted old checkpoint: checkpoint_epoch_0362 +[MEM @ epoch 365 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 366/499 (73% done) --- + [Epoch 366][10/40] loss=0.821628 avg=0.834827 VRAM=38.9GiB | 73.2% done | ETA(epoch): 690s + [Epoch 366][20/40] loss=0.918653 avg=0.856452 VRAM=38.8GiB | 73.3% done | ETA(epoch): 460s + [Epoch 366][30/40] loss=0.936131 avg=0.861473 VRAM=38.9GiB | 73.4% done | ETA(epoch): 230s + [Epoch 366][40/40] loss=0.746845 avg=0.852576 VRAM=38.8GiB | 73.4% done | ETA(epoch): 0s + Train loss: 0.852576 (920.1s) ETA: 2223min + Val loss: 0.875721 [t_0.0-0.2=1.0570 t_0.2-0.4=0.9758 t_0.4-0.6=0.8557 t_0.6-0.8=0.7033 t_0.8-1.0=0.6389] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0366 + Deleted old checkpoint: checkpoint_epoch_0363 +[MEM @ epoch 366 end] RAM: 21.0/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 367/499 (73% done) --- + [Epoch 367][10/40] loss=0.841279 avg=0.878261 VRAM=38.9GiB | 73.5% done | ETA(epoch): 690s + [Epoch 367][20/40] loss=0.854879 avg=0.855847 VRAM=38.8GiB | 73.5% done | ETA(epoch): 460s + [Epoch 367][30/40] loss=0.888475 avg=0.860857 VRAM=38.9GiB | 73.6% done | ETA(epoch): 230s + [Epoch 367][40/40] loss=0.830544 avg=0.862858 VRAM=38.8GiB | 73.6% done | ETA(epoch): 0s + Train loss: 0.862858 (920.5s) ETA: 2206min + Val loss: 0.851734 [t_0.0-0.2=1.0730 t_0.2-0.4=1.0057 t_0.4-0.6=0.8486 t_0.6-0.8=0.7065 t_0.8-1.0=0.6332] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0367 + Deleted old checkpoint: checkpoint_epoch_0364 +[MEM @ epoch 367 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 368/499 (74% done) --- + [Epoch 368][10/40] loss=0.891173 avg=0.836959 VRAM=38.9GiB | 73.7% done | ETA(epoch): 690s + [Epoch 368][20/40] loss=0.924100 avg=0.854100 VRAM=38.8GiB | 73.7% done | ETA(epoch): 460s + [Epoch 368][30/40] loss=0.786841 avg=0.859059 VRAM=38.9GiB | 73.8% done | ETA(epoch): 230s + [Epoch 368][40/40] loss=0.818884 avg=0.860811 VRAM=38.8GiB | 73.8% done | ETA(epoch): 0s + Train loss: 0.860811 (920.1s) ETA: 2189min + Val loss: 0.835818 [t_0.0-0.2=1.0902 t_0.2-0.4=0.9700 t_0.4-0.6=0.8412 t_0.6-0.8=0.7079 t_0.8-1.0=0.6269] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0368 + Deleted old checkpoint: checkpoint_epoch_0365 +[MEM @ epoch 368 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 369/499 (74% done) --- + [Epoch 369][10/40] loss=0.858540 avg=0.853559 VRAM=38.9GiB | 73.9% done | ETA(epoch): 689s + [Epoch 369][20/40] loss=0.965665 avg=0.843093 VRAM=38.8GiB | 73.9% done | ETA(epoch): 460s + [Epoch 369][30/40] loss=0.948036 avg=0.834929 VRAM=38.9GiB | 74.0% done | ETA(epoch): 230s + [Epoch 369][40/40] loss=0.758481 avg=0.830892 VRAM=38.8GiB | 74.0% done | ETA(epoch): 0s + Train loss: 0.830892 (920.0s) ETA: 2172min + Val loss: 0.872494 [t_0.0-0.2=1.0739 t_0.2-0.4=0.9932 t_0.4-0.6=0.8523 t_0.6-0.8=0.7155 t_0.8-1.0=0.6448] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0369 + Deleted old checkpoint: checkpoint_epoch_0366 +[MEM @ epoch 369 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 370/499 (74% done) --- + [Epoch 370][10/40] loss=0.850962 avg=0.855971 VRAM=38.9GiB | 74.1% done | ETA(epoch): 690s + [Epoch 370][20/40] loss=0.901797 avg=0.853079 VRAM=38.8GiB | 74.1% done | ETA(epoch): 460s + [Epoch 370][30/40] loss=0.917626 avg=0.850751 VRAM=38.9GiB | 74.2% done | ETA(epoch): 230s + [Epoch 370][40/40] loss=0.937014 avg=0.858683 VRAM=38.8GiB | 74.2% done | ETA(epoch): 0s + Train loss: 0.858683 (920.7s) ETA: 2156min + Val loss: 0.864396 [t_0.0-0.2=1.0809 t_0.2-0.4=0.9690 t_0.4-0.6=0.8774 t_0.6-0.8=0.7059 t_0.8-1.0=0.6391] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0370 + Deleted old checkpoint: checkpoint_epoch_0367 +[MEM @ epoch 370 end] RAM: 21.0/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 371/499 (74% done) --- + [Epoch 371][10/40] loss=0.776354 avg=0.825205 VRAM=38.9GiB | 74.2% done | ETA(epoch): 690s + [Epoch 371][20/40] loss=0.809569 avg=0.836808 VRAM=38.8GiB | 74.3% done | ETA(epoch): 460s + [Epoch 371][30/40] loss=0.891939 avg=0.840208 VRAM=38.9GiB | 74.4% done | ETA(epoch): 230s + [Epoch 371][40/40] loss=0.993900 avg=0.847329 VRAM=38.8GiB | 74.4% done | ETA(epoch): 0s + Train loss: 0.847329 (919.9s) ETA: 2139min + Val loss: 0.869617 [t_0.0-0.2=1.0739 t_0.2-0.4=1.0143 t_0.4-0.6=0.8487 t_0.6-0.8=0.7036 t_0.8-1.0=0.6273] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0371 + Deleted old checkpoint: checkpoint_epoch_0368 +[MEM @ epoch 371 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 372/499 (74% done) --- + [Epoch 372][10/40] loss=0.874538 avg=0.872250 VRAM=38.9GiB | 74.5% done | ETA(epoch): 690s + [Epoch 372][20/40] loss=0.875532 avg=0.860822 VRAM=38.8GiB | 74.5% done | ETA(epoch): 460s + [Epoch 372][30/40] loss=0.842642 avg=0.850896 VRAM=38.9GiB | 74.6% done | ETA(epoch): 230s + [Epoch 372][40/40] loss=0.885873 avg=0.847279 VRAM=38.8GiB | 74.6% done | ETA(epoch): 0s + Train loss: 0.847279 (920.3s) ETA: 2122min + Val loss: 0.820075 [t_0.0-0.2=1.0877 t_0.2-0.4=1.0129 t_0.4-0.6=0.8288 t_0.6-0.8=0.6933 t_0.8-1.0=0.6234] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0372 + Deleted old checkpoint: checkpoint_epoch_0369 +[MEM @ epoch 372 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 373/499 (75% done) --- + [Epoch 373][10/40] loss=0.854189 avg=0.826748 VRAM=38.9GiB | 74.7% done | ETA(epoch): 691s + [Epoch 373][20/40] loss=0.871620 avg=0.831286 VRAM=38.8GiB | 74.7% done | ETA(epoch): 461s + [Epoch 373][30/40] loss=0.969367 avg=0.838807 VRAM=38.9GiB | 74.8% done | ETA(epoch): 230s + [Epoch 373][40/40] loss=0.868936 avg=0.842417 VRAM=38.8GiB | 74.8% done | ETA(epoch): 0s + Train loss: 0.842417 (920.9s) ETA: 2105min + Val loss: 0.857480 [t_0.0-0.2=1.0653 t_0.2-0.4=1.0280 t_0.4-0.6=0.8322 t_0.6-0.8=0.7122 t_0.8-1.0=0.6385] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0373 + Deleted old checkpoint: checkpoint_epoch_0370 +[MEM @ epoch 373 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 374/499 (75% done) --- + [Epoch 374][10/40] loss=0.896383 avg=0.866834 VRAM=38.9GiB | 74.9% done | ETA(epoch): 691s + [Epoch 374][20/40] loss=0.861385 avg=0.866026 VRAM=38.8GiB | 74.9% done | ETA(epoch): 460s + [Epoch 374][30/40] loss=0.874587 avg=0.854124 VRAM=38.9GiB | 75.0% done | ETA(epoch): 230s + [Epoch 374][40/40] loss=0.934357 avg=0.855452 VRAM=38.8GiB | 75.0% done | ETA(epoch): 0s + Train loss: 0.855452 (921.1s) ETA: 2088min + Val loss: 0.878562 [t_0.0-0.2=1.0673 t_0.2-0.4=1.0087 t_0.4-0.6=0.8405 t_0.6-0.8=0.7123 t_0.8-1.0=0.6473] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0374 + Deleted old checkpoint: checkpoint_epoch_0371 +[MEM @ epoch 374 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 375/499 (75% done) --- + [Epoch 375][10/40] loss=0.892356 avg=0.817163 VRAM=38.9GiB | 75.0% done | ETA(epoch): 691s + [Epoch 375][20/40] loss=0.843643 avg=0.844257 VRAM=38.8GiB | 75.1% done | ETA(epoch): 461s + [Epoch 375][30/40] loss=0.931394 avg=0.859164 VRAM=38.9GiB | 75.1% done | ETA(epoch): 230s + [Epoch 375][40/40] loss=0.879299 avg=0.854665 VRAM=38.8GiB | 75.2% done | ETA(epoch): 0s + Train loss: 0.854665 (921.2s) ETA: 2072min + Val loss: 0.847909 [t_0.0-0.2=1.0687 t_0.2-0.4=1.0055 t_0.4-0.6=0.8096 t_0.6-0.8=0.7375 t_0.8-1.0=0.6400] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0375 + Deleted old checkpoint: checkpoint_epoch_0372 +[MEM @ epoch 375 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 376/499 (75% done) --- + [Epoch 376][10/40] loss=0.784150 avg=0.820790 VRAM=38.9GiB | 75.2% done | ETA(epoch): 691s + [Epoch 376][20/40] loss=0.924328 avg=0.835397 VRAM=38.8GiB | 75.3% done | ETA(epoch): 461s + [Epoch 376][30/40] loss=0.810422 avg=0.835823 VRAM=38.9GiB | 75.3% done | ETA(epoch): 230s + [Epoch 376][40/40] loss=0.803833 avg=0.835295 VRAM=38.8GiB | 75.4% done | ETA(epoch): 0s + Train loss: 0.835295 (921.4s) ETA: 2055min + Val loss: 0.854615 [t_0.0-0.2=1.0728 t_0.2-0.4=1.0209 t_0.4-0.6=0.8620 t_0.6-0.8=0.6973 t_0.8-1.0=0.6195] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0376 + Deleted old checkpoint: checkpoint_epoch_0373 +[MEM @ epoch 376 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 377/499 (75% done) --- + [Epoch 377][10/40] loss=0.854178 avg=0.846686 VRAM=38.9GiB | 75.4% done | ETA(epoch): 691s + [Epoch 377][20/40] loss=0.915900 avg=0.849127 VRAM=38.8GiB | 75.5% done | ETA(epoch): 461s + [Epoch 377][30/40] loss=0.893943 avg=0.843360 VRAM=38.9GiB | 75.5% done | ETA(epoch): 230s + [Epoch 377][40/40] loss=0.826918 avg=0.843167 VRAM=38.8GiB | 75.6% done | ETA(epoch): 0s + Train loss: 0.843167 (921.3s) ETA: 2038min + Val loss: 0.835853 [t_0.0-0.2=1.0683 t_0.2-0.4=0.9971 t_0.4-0.6=0.8489 t_0.6-0.8=0.7118 t_0.8-1.0=0.6049] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0377 + Deleted old checkpoint: checkpoint_epoch_0374 +[MEM @ epoch 377 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 378/499 (76% done) --- + [Epoch 378][10/40] loss=0.690870 avg=0.829820 VRAM=38.9GiB | 75.6% done | ETA(epoch): 691s + [Epoch 378][20/40] loss=0.845919 avg=0.832173 VRAM=38.8GiB | 75.7% done | ETA(epoch): 461s + [Epoch 378][30/40] loss=0.798816 avg=0.842310 VRAM=38.9GiB | 75.8% done | ETA(epoch): 230s + [Epoch 378][40/40] loss=0.794688 avg=0.836291 VRAM=38.8GiB | 75.8% done | ETA(epoch): 0s + Train loss: 0.836291 (921.2s) ETA: 2021min + Val loss: 0.847096 [t_0.0-0.2=1.0745 t_0.2-0.4=1.0008 t_0.4-0.6=0.8409 t_0.6-0.8=0.7222 t_0.8-1.0=0.6363] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0378 + Deleted old checkpoint: checkpoint_epoch_0375 +[MEM @ epoch 378 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 379/499 (76% done) --- + [Epoch 379][10/40] loss=0.831825 avg=0.849713 VRAM=38.9GiB | 75.8% done | ETA(epoch): 690s + [Epoch 379][20/40] loss=0.869020 avg=0.857094 VRAM=38.8GiB | 75.9% done | ETA(epoch): 460s + [Epoch 379][30/40] loss=0.931578 avg=0.870547 VRAM=38.9GiB | 75.9% done | ETA(epoch): 230s + [Epoch 379][40/40] loss=0.874501 avg=0.877061 VRAM=38.8GiB | 76.0% done | ETA(epoch): 0s + Train loss: 0.877061 (920.6s) ETA: 2004min + Val loss: 0.855392 [t_0.0-0.2=1.0553 t_0.2-0.4=0.9737 t_0.4-0.6=0.8733 t_0.6-0.8=0.7200 t_0.8-1.0=0.6429] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0379 + Deleted old checkpoint: checkpoint_epoch_0376 +[MEM @ epoch 379 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 380/499 (76% done) --- + [Epoch 380][10/40] loss=0.821213 avg=0.873576 VRAM=38.9GiB | 76.0% done | ETA(epoch): 690s + [Epoch 380][20/40] loss=0.898436 avg=0.868097 VRAM=38.8GiB | 76.1% done | ETA(epoch): 460s + [Epoch 380][30/40] loss=0.827274 avg=0.858538 VRAM=38.9GiB | 76.1% done | ETA(epoch): 230s + [Epoch 380][40/40] loss=0.901508 avg=0.848421 VRAM=38.8GiB | 76.2% done | ETA(epoch): 0s + Train loss: 0.848421 (921.0s) ETA: 1988min + Val loss: 0.857570 [t_0.0-0.2=1.0771 t_0.2-0.4=0.9765 t_0.4-0.6=0.8873 t_0.6-0.8=0.7139 t_0.8-1.0=0.6210] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0380 + Deleted old checkpoint: checkpoint_epoch_0377 +[MEM @ epoch 380 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 381/499 (76% done) --- + [Epoch 381][10/40] loss=0.868677 avg=0.881925 VRAM=38.9GiB | 76.2% done | ETA(epoch): 691s + [Epoch 381][20/40] loss=0.857735 avg=0.873523 VRAM=38.8GiB | 76.3% done | ETA(epoch): 460s + [Epoch 381][30/40] loss=0.868762 avg=0.861475 VRAM=38.9GiB | 76.3% done | ETA(epoch): 230s + [Epoch 381][40/40] loss=0.859377 avg=0.858764 VRAM=38.8GiB | 76.4% done | ETA(epoch): 0s + Train loss: 0.858764 (921.0s) ETA: 1971min + Val loss: 0.852841 [t_0.0-0.2=1.0770 t_0.2-0.4=1.0069 t_0.4-0.6=0.8576 t_0.6-0.8=0.7037 t_0.8-1.0=0.6304] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0381 + Deleted old checkpoint: checkpoint_epoch_0378 +[MEM @ epoch 381 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 382/499 (76% done) --- + [Epoch 382][10/40] loss=0.783085 avg=0.846761 VRAM=38.9GiB | 76.4% done | ETA(epoch): 691s + [Epoch 382][20/40] loss=0.936990 avg=0.871045 VRAM=38.8GiB | 76.5% done | ETA(epoch): 461s + [Epoch 382][30/40] loss=0.779609 avg=0.863373 VRAM=38.9GiB | 76.5% done | ETA(epoch): 230s + [Epoch 382][40/40] loss=0.805659 avg=0.858816 VRAM=38.8GiB | 76.6% done | ETA(epoch): 0s + Train loss: 0.858816 (921.6s) ETA: 1954min + Val loss: 0.868645 [t_0.0-0.2=1.0622 t_0.2-0.4=0.9985 t_0.4-0.6=0.8564 t_0.6-0.8=0.7137 t_0.8-1.0=0.6279] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0382 + Deleted old checkpoint: checkpoint_epoch_0379 +[MEM @ epoch 382 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 383/499 (77% done) --- + [Epoch 383][10/40] loss=0.895340 avg=0.883723 VRAM=38.9GiB | 76.6% done | ETA(epoch): 691s + [Epoch 383][20/40] loss=0.829426 avg=0.882746 VRAM=38.8GiB | 76.7% done | ETA(epoch): 461s + [Epoch 383][30/40] loss=0.872666 avg=0.876243 VRAM=38.9GiB | 76.8% done | ETA(epoch): 230s + [Epoch 383][40/40] loss=0.911398 avg=0.876412 VRAM=38.8GiB | 76.8% done | ETA(epoch): 0s + Train loss: 0.876412 (921.7s) ETA: 1937min + Val loss: 0.844320 [t_0.0-0.2=1.0760 t_0.2-0.4=1.0119 t_0.4-0.6=0.8472 t_0.6-0.8=0.7268 t_0.8-1.0=0.6319] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0383 + Deleted old checkpoint: checkpoint_epoch_0380 +[MEM @ epoch 383 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 384/499 (77% done) --- + [Epoch 384][10/40] loss=0.853513 avg=0.845266 VRAM=38.9GiB | 76.8% done | ETA(epoch): 691s + [Epoch 384][20/40] loss=0.828539 avg=0.855687 VRAM=38.8GiB | 76.9% done | ETA(epoch): 461s + [Epoch 384][30/40] loss=0.823400 avg=0.857558 VRAM=38.9GiB | 77.0% done | ETA(epoch): 230s + [Epoch 384][40/40] loss=0.833270 avg=0.857351 VRAM=38.8GiB | 77.0% done | ETA(epoch): 0s + Train loss: 0.857351 (921.8s) ETA: 1921min + Val loss: 0.871026 [t_0.0-0.2=1.0764 t_0.2-0.4=1.0088 t_0.4-0.6=0.8370 t_0.6-0.8=0.7015 t_0.8-1.0=0.6303] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0384 + Deleted old checkpoint: checkpoint_epoch_0381 +[MEM @ epoch 384 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 385/499 (77% done) --- + [Epoch 385][10/40] loss=0.816090 avg=0.866031 VRAM=38.9GiB | 77.0% done | ETA(epoch): 691s + [Epoch 385][20/40] loss=0.836394 avg=0.860360 VRAM=38.8GiB | 77.1% done | ETA(epoch): 461s + [Epoch 385][30/40] loss=0.974739 avg=0.869377 VRAM=38.9GiB | 77.1% done | ETA(epoch): 230s + [Epoch 385][40/40] loss=0.742331 avg=0.865934 VRAM=38.8GiB | 77.2% done | ETA(epoch): 0s + Train loss: 0.865934 (921.2s) ETA: 1904min + Val loss: 0.890722 [t_0.0-0.2=1.0670 t_0.2-0.4=0.9789 t_0.4-0.6=0.8657 t_0.6-0.8=0.7120 t_0.8-1.0=0.6510] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0385 + Deleted old checkpoint: checkpoint_epoch_0382 +[MEM @ epoch 385 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 386/499 (77% done) --- + [Epoch 386][10/40] loss=0.893741 avg=0.873035 VRAM=38.9GiB | 77.2% done | ETA(epoch): 691s + [Epoch 386][20/40] loss=0.818724 avg=0.848510 VRAM=38.8GiB | 77.3% done | ETA(epoch): 460s + [Epoch 386][30/40] loss=0.929578 avg=0.849335 VRAM=38.9GiB | 77.3% done | ETA(epoch): 230s + [Epoch 386][40/40] loss=0.906691 avg=0.848634 VRAM=38.8GiB | 77.4% done | ETA(epoch): 0s + Train loss: 0.848634 (920.8s) ETA: 1887min + Val loss: 0.838821 [t_0.0-0.2=1.0749 t_0.2-0.4=1.0081 t_0.4-0.6=0.8466 t_0.6-0.8=0.7266 t_0.8-1.0=0.6272] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0386 + Deleted old checkpoint: checkpoint_epoch_0383 +[MEM @ epoch 386 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 387/499 (77% done) --- + [Epoch 387][10/40] loss=0.822249 avg=0.827058 VRAM=38.9GiB | 77.5% done | ETA(epoch): 691s + [Epoch 387][20/40] loss=0.942372 avg=0.838301 VRAM=38.8GiB | 77.5% done | ETA(epoch): 460s + [Epoch 387][30/40] loss=0.931273 avg=0.851076 VRAM=38.9GiB | 77.5% done | ETA(epoch): 230s + [Epoch 387][40/40] loss=0.735905 avg=0.844710 VRAM=38.8GiB | 77.6% done | ETA(epoch): 0s + Train loss: 0.844710 (920.6s) ETA: 1870min + Val loss: 0.854219 [t_0.0-0.2=1.0612 t_0.2-0.4=1.0161 t_0.4-0.6=0.8500 t_0.6-0.8=0.6929 t_0.8-1.0=0.6259] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0387 + Deleted old checkpoint: checkpoint_epoch_0384 +[MEM @ epoch 387 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 388/499 (78% done) --- + [Epoch 388][10/40] loss=0.909346 avg=0.867816 VRAM=38.9GiB | 77.6% done | ETA(epoch): 690s + [Epoch 388][20/40] loss=0.799582 avg=0.844198 VRAM=38.8GiB | 77.7% done | ETA(epoch): 461s + [Epoch 388][30/40] loss=0.777769 avg=0.838015 VRAM=38.9GiB | 77.8% done | ETA(epoch): 230s + [Epoch 388][40/40] loss=1.001230 avg=0.848821 VRAM=38.8GiB | 77.8% done | ETA(epoch): 0s + Train loss: 0.848821 (921.3s) ETA: 1853min + Val loss: 0.851754 [t_0.0-0.2=1.0699 t_0.2-0.4=0.9898 t_0.4-0.6=0.8524 t_0.6-0.8=0.7053 t_0.8-1.0=0.6288] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0388 + Deleted old checkpoint: checkpoint_epoch_0385 +[MEM @ epoch 388 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 389/499 (78% done) --- + [Epoch 389][10/40] loss=0.910225 avg=0.859232 VRAM=38.9GiB | 77.8% done | ETA(epoch): 691s + [Epoch 389][20/40] loss=0.800874 avg=0.864006 VRAM=38.8GiB | 77.9% done | ETA(epoch): 461s + [Epoch 389][30/40] loss=0.875930 avg=0.852312 VRAM=38.9GiB | 78.0% done | ETA(epoch): 230s + [Epoch 389][40/40] loss=0.890278 avg=0.854820 VRAM=38.8GiB | 78.0% done | ETA(epoch): 0s + Train loss: 0.854820 (921.5s) ETA: 1837min + Val loss: 0.854539 [t_0.0-0.2=1.0633 t_0.2-0.4=0.9975 t_0.4-0.6=0.8635 t_0.6-0.8=0.7100 t_0.8-1.0=0.6406] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0389 + Deleted old checkpoint: checkpoint_epoch_0386 +[MEM @ epoch 389 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 390/499 (78% done) --- + [Epoch 390][10/40] loss=0.845545 avg=0.851030 VRAM=38.9GiB | 78.0% done | ETA(epoch): 692s + [Epoch 390][20/40] loss=0.864528 avg=0.858955 VRAM=38.8GiB | 78.1% done | ETA(epoch): 461s + [Epoch 390][30/40] loss=0.951956 avg=0.866712 VRAM=38.9GiB | 78.1% done | ETA(epoch): 231s + [Epoch 390][40/40] loss=0.878650 avg=0.865680 VRAM=38.8GiB | 78.2% done | ETA(epoch): 0s + Train loss: 0.865680 (922.1s) ETA: 1820min + Val loss: 0.878891 [t_0.0-0.2=1.0621 t_0.2-0.4=1.0313 t_0.4-0.6=0.8255 t_0.6-0.8=0.7149 t_0.8-1.0=0.6422] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0390 + Deleted old checkpoint: checkpoint_epoch_0387 +[MEM @ epoch 390 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 391/499 (78% done) --- + [Epoch 391][10/40] loss=0.793005 avg=0.883448 VRAM=38.9GiB | 78.2% done | ETA(epoch): 691s + [Epoch 391][20/40] loss=0.904154 avg=0.867199 VRAM=38.8GiB | 78.3% done | ETA(epoch): 461s + [Epoch 391][30/40] loss=0.934302 avg=0.871487 VRAM=38.9GiB | 78.3% done | ETA(epoch): 230s + [Epoch 391][40/40] loss=0.816145 avg=0.861668 VRAM=38.8GiB | 78.4% done | ETA(epoch): 0s + Train loss: 0.861668 (921.6s) ETA: 1803min + Val loss: 0.843311 [t_0.0-0.2=1.0819 t_0.2-0.4=0.9683 t_0.4-0.6=0.8507 t_0.6-0.8=0.7108 t_0.8-1.0=0.6231] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0391 + Deleted old checkpoint: checkpoint_epoch_0388 +[MEM @ epoch 391 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 392/499 (78% done) --- + [Epoch 392][10/40] loss=0.854985 avg=0.846071 VRAM=38.9GiB | 78.5% done | ETA(epoch): 691s + [Epoch 392][20/40] loss=0.845774 avg=0.836641 VRAM=38.8GiB | 78.5% done | ETA(epoch): 461s + [Epoch 392][30/40] loss=0.916552 avg=0.857442 VRAM=38.9GiB | 78.5% done | ETA(epoch): 230s + [Epoch 392][40/40] loss=0.909769 avg=0.853609 VRAM=38.8GiB | 78.6% done | ETA(epoch): 0s + Train loss: 0.853609 (921.5s) ETA: 1786min + Val loss: 0.864121 [t_0.0-0.2=1.0754 t_0.2-0.4=0.9971 t_0.4-0.6=0.8272 t_0.6-0.8=0.7093 t_0.8-1.0=0.6333] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0392 + Deleted old checkpoint: checkpoint_epoch_0389 +[MEM @ epoch 392 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 393/499 (79% done) --- + [Epoch 393][10/40] loss=0.807236 avg=0.835080 VRAM=38.9GiB | 78.6% done | ETA(epoch): 691s + [Epoch 393][20/40] loss=0.788518 avg=0.840959 VRAM=38.8GiB | 78.7% done | ETA(epoch): 461s + [Epoch 393][30/40] loss=0.797658 avg=0.837360 VRAM=38.9GiB | 78.8% done | ETA(epoch): 230s + [Epoch 393][40/40] loss=0.717028 avg=0.838350 VRAM=38.8GiB | 78.8% done | ETA(epoch): 0s + Train loss: 0.838350 (921.6s) ETA: 1770min + Val loss: 0.853709 [t_0.0-0.2=1.0527 t_0.2-0.4=1.0058 t_0.4-0.6=0.8377 t_0.6-0.8=0.7055 t_0.8-1.0=0.6456] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0393 + Deleted old checkpoint: checkpoint_epoch_0390 +[MEM @ epoch 393 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 394/499 (79% done) --- + [Epoch 394][10/40] loss=0.847888 avg=0.846689 VRAM=38.9GiB | 78.8% done | ETA(epoch): 691s + [Epoch 394][20/40] loss=0.987628 avg=0.853592 VRAM=38.8GiB | 78.9% done | ETA(epoch): 461s + [Epoch 394][30/40] loss=0.900753 avg=0.855160 VRAM=38.9GiB | 79.0% done | ETA(epoch): 230s + [Epoch 394][40/40] loss=0.849845 avg=0.850318 VRAM=38.8GiB | 79.0% done | ETA(epoch): 0s + Train loss: 0.850318 (921.6s) ETA: 1753min + Val loss: 0.845852 [t_0.0-0.2=1.0888 t_0.2-0.4=0.9610 t_0.4-0.6=0.8257 t_0.6-0.8=0.6946 t_0.8-1.0=0.6205] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0394 + Deleted old checkpoint: checkpoint_epoch_0391 +[MEM @ epoch 394 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 395/499 (79% done) --- + [Epoch 395][10/40] loss=0.849032 avg=0.860264 VRAM=38.9GiB | 79.0% done | ETA(epoch): 691s + [Epoch 395][20/40] loss=0.912685 avg=0.873934 VRAM=38.8GiB | 79.1% done | ETA(epoch): 460s + [Epoch 395][30/40] loss=0.841246 avg=0.856223 VRAM=38.9GiB | 79.1% done | ETA(epoch): 230s + [Epoch 395][40/40] loss=0.869892 avg=0.853279 VRAM=38.8GiB | 79.2% done | ETA(epoch): 0s + Train loss: 0.853279 (920.5s) ETA: 1736min + Val loss: 0.867337 [t_0.0-0.2=1.0675 t_0.2-0.4=1.0239 t_0.4-0.6=0.8312 t_0.6-0.8=0.7061 t_0.8-1.0=0.6472] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0395 + Deleted old checkpoint: checkpoint_epoch_0392 +[MEM @ epoch 395 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 396/499 (79% done) --- + [Epoch 396][10/40] loss=0.917180 avg=0.839122 VRAM=38.9GiB | 79.2% done | ETA(epoch): 690s + [Epoch 396][20/40] loss=0.915038 avg=0.841762 VRAM=38.8GiB | 79.3% done | ETA(epoch): 460s + [Epoch 396][30/40] loss=0.872560 avg=0.839624 VRAM=38.9GiB | 79.3% done | ETA(epoch): 230s + [Epoch 396][40/40] loss=0.830556 avg=0.837895 VRAM=38.8GiB | 79.4% done | ETA(epoch): 0s + Train loss: 0.837895 (920.6s) ETA: 1719min + Val loss: 0.847928 [t_0.0-0.2=1.0526 t_0.2-0.4=1.0174 t_0.4-0.6=0.8107 t_0.6-0.8=0.7343 t_0.8-1.0=0.6238] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0396 + Deleted old checkpoint: checkpoint_epoch_0393 +[MEM @ epoch 396 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 397/499 (79% done) --- + [Epoch 397][10/40] loss=0.821952 avg=0.832396 VRAM=38.9GiB | 79.5% done | ETA(epoch): 690s + [Epoch 397][20/40] loss=0.886247 avg=0.842861 VRAM=38.8GiB | 79.5% done | ETA(epoch): 460s + [Epoch 397][30/40] loss=0.825591 avg=0.840714 VRAM=38.9GiB | 79.5% done | ETA(epoch): 230s + [Epoch 397][40/40] loss=0.929560 avg=0.854472 VRAM=38.8GiB | 79.6% done | ETA(epoch): 0s + Train loss: 0.854472 (920.8s) ETA: 1703min + Val loss: 0.828767 [t_0.0-0.2=1.0774 t_0.2-0.4=0.9678 t_0.4-0.6=0.8557 t_0.6-0.8=0.7022 t_0.8-1.0=0.6266] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0397 + Deleted old checkpoint: checkpoint_epoch_0394 +[MEM @ epoch 397 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 398/499 (80% done) --- + [Epoch 398][10/40] loss=0.909681 avg=0.871593 VRAM=38.9GiB | 79.7% done | ETA(epoch): 691s + [Epoch 398][20/40] loss=0.782949 avg=0.862526 VRAM=38.8GiB | 79.7% done | ETA(epoch): 460s + [Epoch 398][30/40] loss=0.806314 avg=0.852241 VRAM=38.9GiB | 79.8% done | ETA(epoch): 230s + [Epoch 398][40/40] loss=0.871435 avg=0.844722 VRAM=38.8GiB | 79.8% done | ETA(epoch): 0s + Train loss: 0.844722 (920.7s) ETA: 1686min + Val loss: 0.848172 [t_0.0-0.2=1.0750 t_0.2-0.4=0.9776 t_0.4-0.6=0.8280 t_0.6-0.8=0.6921 t_0.8-1.0=0.6154] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0398 + Deleted old checkpoint: checkpoint_epoch_0395 +[MEM @ epoch 398 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 399/499 (80% done) --- + [Epoch 399][10/40] loss=0.833981 avg=0.842699 VRAM=38.9GiB | 79.8% done | ETA(epoch): 691s + [Epoch 399][20/40] loss=0.894152 avg=0.833694 VRAM=38.8GiB | 79.9% done | ETA(epoch): 461s + [Epoch 399][30/40] loss=0.792050 avg=0.841053 VRAM=38.9GiB | 80.0% done | ETA(epoch): 230s + [Epoch 399][40/40] loss=0.818314 avg=0.848023 VRAM=38.8GiB | 80.0% done | ETA(epoch): 0s + [MilestoneVis] train_0 step 15999 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 15999 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 15999 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 15999 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 15999 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 15999 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 15999 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 15999 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 15999 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 15999 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + Train loss: 0.848023 (1676.9s) ETA: 1672min + Val loss: 0.824077 [t_0.0-0.2=1.0455 t_0.2-0.4=1.0046 t_0.4-0.6=0.9088 t_0.6-0.8=0.7199 t_0.8-1.0=0.6396] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0399 + Deleted old checkpoint: checkpoint_epoch_0396 +[MEM @ epoch 399 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 400/499 (80% done) --- + [Epoch 400][10/40] loss=0.740920 avg=0.838743 VRAM=38.9GiB | 80.0% done | ETA(epoch): 690s + [Epoch 400][20/40] loss=0.866531 avg=0.837510 VRAM=38.8GiB | 80.1% done | ETA(epoch): 460s + [Epoch 400][30/40] loss=0.726544 avg=0.838179 VRAM=38.9GiB | 80.2% done | ETA(epoch): 230s + [Epoch 400][40/40] loss=0.880890 avg=0.845951 VRAM=38.8GiB | 80.2% done | ETA(epoch): 0s + Train loss: 0.845951 (920.6s) ETA: 1655min + Val loss: 0.820275 [t_0.0-0.2=1.0477 t_0.2-0.4=1.0049 t_0.4-0.6=0.8232 t_0.6-0.8=0.6938 t_0.8-1.0=0.6263] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0400 + Deleted old checkpoint: checkpoint_epoch_0397 +[MEM @ epoch 400 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 401/499 (80% done) --- + [Epoch 401][10/40] loss=0.837830 avg=0.839404 VRAM=38.9GiB | 80.2% done | ETA(epoch): 690s + [Epoch 401][20/40] loss=1.001535 avg=0.838629 VRAM=38.8GiB | 80.3% done | ETA(epoch): 460s + [Epoch 401][30/40] loss=0.725190 avg=0.829457 VRAM=38.9GiB | 80.3% done | ETA(epoch): 230s + [Epoch 401][40/40] loss=0.847360 avg=0.827506 VRAM=38.8GiB | 80.4% done | ETA(epoch): 0s + Train loss: 0.827506 (920.6s) ETA: 1639min + Val loss: 0.858189 [t_0.0-0.2=1.0647 t_0.2-0.4=0.9730 t_0.4-0.6=0.8465 t_0.6-0.8=0.6909 t_0.8-1.0=0.6324] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0401 + Deleted old checkpoint: checkpoint_epoch_0398 +[MEM @ epoch 401 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 402/499 (80% done) --- + [Epoch 402][10/40] loss=0.788001 avg=0.839498 VRAM=38.9GiB | 80.5% done | ETA(epoch): 690s + [Epoch 402][20/40] loss=0.785540 avg=0.830751 VRAM=38.8GiB | 80.5% done | ETA(epoch): 460s + [Epoch 402][30/40] loss=0.897865 avg=0.839580 VRAM=38.9GiB | 80.5% done | ETA(epoch): 230s + [Epoch 402][40/40] loss=0.886328 avg=0.838806 VRAM=38.8GiB | 80.6% done | ETA(epoch): 0s + Train loss: 0.838806 (921.1s) ETA: 1622min + Val loss: 0.827318 [t_0.0-0.2=1.0828 t_0.2-0.4=0.9741 t_0.4-0.6=0.8159 t_0.6-0.8=0.7098 t_0.8-1.0=0.6180] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0402 + Deleted old checkpoint: checkpoint_epoch_0399 +[MEM @ epoch 402 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 403/499 (81% done) --- + [Epoch 403][10/40] loss=0.872746 avg=0.859466 VRAM=38.9GiB | 80.7% done | ETA(epoch): 690s + [Epoch 403][20/40] loss=0.847568 avg=0.860761 VRAM=38.8GiB | 80.7% done | ETA(epoch): 460s + [Epoch 403][30/40] loss=0.922362 avg=0.853170 VRAM=38.9GiB | 80.8% done | ETA(epoch): 230s + [Epoch 403][40/40] loss=0.796314 avg=0.854101 VRAM=38.8GiB | 80.8% done | ETA(epoch): 0s + Train loss: 0.854101 (921.0s) ETA: 1605min + Val loss: 0.870200 [t_0.0-0.2=1.0740 t_0.2-0.4=0.9702 t_0.4-0.6=0.8567 t_0.6-0.8=0.7164 t_0.8-1.0=0.6194] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0403 + Deleted old checkpoint: checkpoint_epoch_0400 +[MEM @ epoch 403 end] RAM: 21.7/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 404/499 (81% done) --- + [Epoch 404][10/40] loss=0.885665 avg=0.860281 VRAM=38.9GiB | 80.8% done | ETA(epoch): 691s + [Epoch 404][20/40] loss=0.957669 avg=0.858120 VRAM=38.8GiB | 80.9% done | ETA(epoch): 461s + [Epoch 404][30/40] loss=0.933756 avg=0.855223 VRAM=38.9GiB | 81.0% done | ETA(epoch): 230s + [Epoch 404][40/40] loss=0.830575 avg=0.847316 VRAM=38.8GiB | 81.0% done | ETA(epoch): 0s + Train loss: 0.847316 (921.5s) ETA: 1588min + Val loss: 0.810442 [t_0.0-0.2=1.0852 t_0.2-0.4=1.0098 t_0.4-0.6=0.8421 t_0.6-0.8=0.7058 t_0.8-1.0=0.6366] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0404 (BEST) + Deleted old checkpoint: checkpoint_epoch_0237 + Deleted old checkpoint: checkpoint_epoch_0401 +[MEM @ epoch 404 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 405/499 (81% done) --- + [Epoch 405][10/40] loss=0.879390 avg=0.854052 VRAM=38.9GiB | 81.0% done | ETA(epoch): 690s + [Epoch 405][20/40] loss=0.827822 avg=0.874029 VRAM=38.8GiB | 81.1% done | ETA(epoch): 461s + [Epoch 405][30/40] loss=0.841715 avg=0.869146 VRAM=38.9GiB | 81.2% done | ETA(epoch): 230s + [Epoch 405][40/40] loss=0.840623 avg=0.867312 VRAM=38.8GiB | 81.2% done | ETA(epoch): 0s + Train loss: 0.867312 (921.3s) ETA: 1572min + Val loss: 0.802304 [t_0.0-0.2=1.0741 t_0.2-0.4=0.9581 t_0.4-0.6=0.8332 t_0.6-0.8=0.7201 t_0.8-1.0=0.6314] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0405 (BEST) + Deleted old checkpoint: checkpoint_epoch_0402 +[MEM @ epoch 405 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 406/499 (81% done) --- + [Epoch 406][10/40] loss=0.793798 avg=0.867815 VRAM=38.9GiB | 81.2% done | ETA(epoch): 691s + [Epoch 406][20/40] loss=0.835627 avg=0.852511 VRAM=38.8GiB | 81.3% done | ETA(epoch): 461s + [Epoch 406][30/40] loss=0.935504 avg=0.861365 VRAM=38.9GiB | 81.3% done | ETA(epoch): 230s + [Epoch 406][40/40] loss=0.884501 avg=0.863058 VRAM=38.8GiB | 81.4% done | ETA(epoch): 0s + Train loss: 0.863058 (921.8s) ETA: 1555min + Val loss: 0.818257 [t_0.0-0.2=1.0703 t_0.2-0.4=0.9659 t_0.4-0.6=0.8365 t_0.6-0.8=0.7182 t_0.8-1.0=0.6274] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0406 + Deleted old checkpoint: checkpoint_epoch_0403 +[MEM @ epoch 406 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 407/499 (81% done) --- + [Epoch 407][10/40] loss=0.922909 avg=0.839373 VRAM=38.9GiB | 81.5% done | ETA(epoch): 691s + [Epoch 407][20/40] loss=0.932538 avg=0.856915 VRAM=38.8GiB | 81.5% done | ETA(epoch): 461s + [Epoch 407][30/40] loss=0.824386 avg=0.847079 VRAM=38.9GiB | 81.5% done | ETA(epoch): 230s + [Epoch 407][40/40] loss=0.866712 avg=0.843915 VRAM=38.8GiB | 81.6% done | ETA(epoch): 0s + Train loss: 0.843915 (921.9s) ETA: 1538min + Val loss: 0.798813 [t_0.0-0.2=1.0841 t_0.2-0.4=0.9672 t_0.4-0.6=0.8024 t_0.6-0.8=0.7057 t_0.8-1.0=0.6284] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0407 (BEST) + Deleted old checkpoint: checkpoint_epoch_0404 +[MEM @ epoch 407 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 408/499 (82% done) --- + [Epoch 408][10/40] loss=0.761411 avg=0.850538 VRAM=38.9GiB | 81.7% done | ETA(epoch): 691s + [Epoch 408][20/40] loss=0.913800 avg=0.853196 VRAM=38.8GiB | 81.7% done | ETA(epoch): 461s + [Epoch 408][30/40] loss=0.724708 avg=0.849315 VRAM=38.9GiB | 81.8% done | ETA(epoch): 230s + [Epoch 408][40/40] loss=0.962774 avg=0.852895 VRAM=38.8GiB | 81.8% done | ETA(epoch): 0s + Train loss: 0.852895 (922.0s) ETA: 1521min + Val loss: 0.857916 [t_0.0-0.2=1.0551 t_0.2-0.4=0.9950 t_0.4-0.6=0.8622 t_0.6-0.8=0.7272 t_0.8-1.0=0.6203] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0408 + Deleted old checkpoint: checkpoint_epoch_0405 +[MEM @ epoch 408 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 409/499 (82% done) --- + [Epoch 409][10/40] loss=0.770101 avg=0.811674 VRAM=38.9GiB | 81.8% done | ETA(epoch): 691s + [Epoch 409][20/40] loss=0.804678 avg=0.825255 VRAM=38.8GiB | 81.9% done | ETA(epoch): 461s + [Epoch 409][30/40] loss=1.005644 avg=0.844859 VRAM=38.9GiB | 82.0% done | ETA(epoch): 230s + [Epoch 409][40/40] loss=0.938113 avg=0.842036 VRAM=38.8GiB | 82.0% done | ETA(epoch): 0s + Train loss: 0.842036 (922.1s) ETA: 1504min + Val loss: 0.891131 [t_0.0-0.2=1.0726 t_0.2-0.4=0.9950 t_0.4-0.6=0.8528 t_0.6-0.8=0.7101 t_0.8-1.0=0.6288] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0409 + Deleted old checkpoint: checkpoint_epoch_0406 +[MEM @ epoch 409 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 410/499 (82% done) --- + [Epoch 410][10/40] loss=0.793384 avg=0.859724 VRAM=38.9GiB | 82.0% done | ETA(epoch): 691s + [Epoch 410][20/40] loss=0.788593 avg=0.836989 VRAM=38.8GiB | 82.1% done | ETA(epoch): 461s + [Epoch 410][30/40] loss=0.807183 avg=0.830605 VRAM=38.9GiB | 82.2% done | ETA(epoch): 230s + [Epoch 410][40/40] loss=0.904712 avg=0.834050 VRAM=38.8GiB | 82.2% done | ETA(epoch): 0s + Train loss: 0.834050 (921.8s) ETA: 1488min + Val loss: 0.864125 [t_0.0-0.2=1.0553 t_0.2-0.4=1.0184 t_0.4-0.6=0.8498 t_0.6-0.8=0.7169 t_0.8-1.0=0.6254] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0410 +[MEM @ epoch 410 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 411/499 (82% done) --- + [Epoch 411][10/40] loss=0.758695 avg=0.860352 VRAM=38.9GiB | 82.2% done | ETA(epoch): 691s + [Epoch 411][20/40] loss=0.836460 avg=0.844936 VRAM=38.8GiB | 82.3% done | ETA(epoch): 461s + [Epoch 411][30/40] loss=0.977326 avg=0.851666 VRAM=38.9GiB | 82.3% done | ETA(epoch): 230s + [Epoch 411][40/40] loss=0.900057 avg=0.849821 VRAM=38.8GiB | 82.4% done | ETA(epoch): 0s + Train loss: 0.849821 (921.4s) ETA: 1471min + Val loss: 0.862187 [t_0.0-0.2=1.0674 t_0.2-0.4=0.9792 t_0.4-0.6=0.8164 t_0.6-0.8=0.7229 t_0.8-1.0=0.6241] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0411 + Deleted old checkpoint: checkpoint_epoch_0408 +[MEM @ epoch 411 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 412/499 (82% done) --- + [Epoch 412][10/40] loss=0.772905 avg=0.846650 VRAM=38.9GiB | 82.5% done | ETA(epoch): 691s + [Epoch 412][20/40] loss=0.797797 avg=0.851710 VRAM=38.8GiB | 82.5% done | ETA(epoch): 461s + [Epoch 412][30/40] loss=0.970681 avg=0.852470 VRAM=38.9GiB | 82.5% done | ETA(epoch): 230s + [Epoch 412][40/40] loss=0.848507 avg=0.859015 VRAM=38.8GiB | 82.6% done | ETA(epoch): 0s + Train loss: 0.859015 (921.4s) ETA: 1454min + Val loss: 0.878490 [t_0.0-0.2=1.0795 t_0.2-0.4=1.0394 t_0.4-0.6=0.8173 t_0.6-0.8=0.6747 t_0.8-1.0=0.6298] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0412 + Deleted old checkpoint: checkpoint_epoch_0409 +[MEM @ epoch 412 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 413/499 (83% done) --- + [Epoch 413][10/40] loss=0.733534 avg=0.824830 VRAM=38.9GiB | 82.7% done | ETA(epoch): 691s + [Epoch 413][20/40] loss=0.813946 avg=0.829059 VRAM=38.8GiB | 82.7% done | ETA(epoch): 461s + [Epoch 413][30/40] loss=0.901886 avg=0.831410 VRAM=38.9GiB | 82.8% done | ETA(epoch): 230s + [Epoch 413][40/40] loss=0.730703 avg=0.834852 VRAM=38.8GiB | 82.8% done | ETA(epoch): 0s + Train loss: 0.834852 (921.6s) ETA: 1437min + Val loss: 0.855600 [t_0.0-0.2=1.0628 t_0.2-0.4=0.9856 t_0.4-0.6=0.8741 t_0.6-0.8=0.6910 t_0.8-1.0=0.6335] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0413 + Deleted old checkpoint: checkpoint_epoch_0410 +[MEM @ epoch 413 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 414/499 (83% done) --- + [Epoch 414][10/40] loss=0.841370 avg=0.860071 VRAM=38.9GiB | 82.8% done | ETA(epoch): 691s + [Epoch 414][20/40] loss=0.879858 avg=0.840883 VRAM=38.8GiB | 82.9% done | ETA(epoch): 461s + [Epoch 414][30/40] loss=0.785608 avg=0.856070 VRAM=38.9GiB | 83.0% done | ETA(epoch): 230s + [Epoch 414][40/40] loss=0.811423 avg=0.853298 VRAM=38.8GiB | 83.0% done | ETA(epoch): 0s + Train loss: 0.853298 (921.8s) ETA: 1421min + Val loss: 0.842253 [t_0.0-0.2=1.0765 t_0.2-0.4=0.9681 t_0.4-0.6=0.8340 t_0.6-0.8=0.7083 t_0.8-1.0=0.6149] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0414 + Deleted old checkpoint: checkpoint_epoch_0411 +[MEM @ epoch 414 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 415/499 (83% done) --- + [Epoch 415][10/40] loss=0.944134 avg=0.855016 VRAM=38.9GiB | 83.0% done | ETA(epoch): 691s + [Epoch 415][20/40] loss=0.845754 avg=0.854878 VRAM=38.8GiB | 83.1% done | ETA(epoch): 460s + [Epoch 415][30/40] loss=0.904378 avg=0.849249 VRAM=38.9GiB | 83.2% done | ETA(epoch): 230s + [Epoch 415][40/40] loss=0.997143 avg=0.850359 VRAM=38.8GiB | 83.2% done | ETA(epoch): 0s + Train loss: 0.850359 (921.2s) ETA: 1404min + Val loss: 0.853649 [t_0.0-0.2=1.0843 t_0.2-0.4=0.9891 t_0.4-0.6=0.8269 t_0.6-0.8=0.6901 t_0.8-1.0=0.6259] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0415 + Deleted old checkpoint: checkpoint_epoch_0412 +[MEM @ epoch 415 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 416/499 (83% done) --- + [Epoch 416][10/40] loss=0.793615 avg=0.851133 VRAM=38.9GiB | 83.2% done | ETA(epoch): 691s + [Epoch 416][20/40] loss=0.947723 avg=0.848664 VRAM=38.8GiB | 83.3% done | ETA(epoch): 461s + [Epoch 416][30/40] loss=0.873073 avg=0.855213 VRAM=38.9GiB | 83.4% done | ETA(epoch): 231s + [Epoch 416][40/40] loss=0.921230 avg=0.847258 VRAM=38.8GiB | 83.4% done | ETA(epoch): 0s + Train loss: 0.847258 (922.2s) ETA: 1387min + Val loss: 0.861862 [t_0.0-0.2=1.0757 t_0.2-0.4=0.9988 t_0.4-0.6=0.8217 t_0.6-0.8=0.6987 t_0.8-1.0=0.6471] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0416 + Deleted old checkpoint: checkpoint_epoch_0413 +[MEM @ epoch 416 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 417/499 (83% done) --- + [Epoch 417][10/40] loss=0.946042 avg=0.881672 VRAM=38.9GiB | 83.5% done | ETA(epoch): 691s + [Epoch 417][20/40] loss=0.842291 avg=0.873138 VRAM=38.8GiB | 83.5% done | ETA(epoch): 461s + [Epoch 417][30/40] loss=0.785862 avg=0.864322 VRAM=38.9GiB | 83.5% done | ETA(epoch): 230s + [Epoch 417][40/40] loss=0.726357 avg=0.859351 VRAM=38.8GiB | 83.6% done | ETA(epoch): 0s + Train loss: 0.859351 (921.3s) ETA: 1370min + Val loss: 0.850609 [t_0.0-0.2=1.0774 t_0.2-0.4=0.9868 t_0.4-0.6=0.8607 t_0.6-0.8=0.6904 t_0.8-1.0=0.6205] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0417 + Deleted old checkpoint: checkpoint_epoch_0414 +[MEM @ epoch 417 end] RAM: 21.7/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 418/499 (84% done) --- + [Epoch 418][10/40] loss=0.839719 avg=0.822448 VRAM=38.9GiB | 83.7% done | ETA(epoch): 692s + [Epoch 418][20/40] loss=0.793275 avg=0.828357 VRAM=38.8GiB | 83.7% done | ETA(epoch): 461s + [Epoch 418][30/40] loss=0.872562 avg=0.841446 VRAM=38.9GiB | 83.8% done | ETA(epoch): 230s + [Epoch 418][40/40] loss=0.854381 avg=0.843168 VRAM=38.8GiB | 83.8% done | ETA(epoch): 0s + Train loss: 0.843168 (921.8s) ETA: 1354min + Val loss: 0.853250 [t_0.0-0.2=1.0840 t_0.2-0.4=0.9877 t_0.4-0.6=0.8272 t_0.6-0.8=0.7132 t_0.8-1.0=0.6214] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0418 + Deleted old checkpoint: checkpoint_epoch_0415 +[MEM @ epoch 418 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 419/499 (84% done) --- + [Epoch 419][10/40] loss=0.795043 avg=0.853494 VRAM=38.9GiB | 83.9% done | ETA(epoch): 692s + [Epoch 419][20/40] loss=0.908123 avg=0.863361 VRAM=38.8GiB | 83.9% done | ETA(epoch): 461s + [Epoch 419][30/40] loss=0.829835 avg=0.858887 VRAM=38.9GiB | 84.0% done | ETA(epoch): 231s + [Epoch 419][40/40] loss=0.957040 avg=0.860353 VRAM=38.8GiB | 84.0% done | ETA(epoch): 0s + Train loss: 0.860353 (922.4s) ETA: 1337min + Val loss: 0.860364 [t_0.0-0.2=1.0719 t_0.2-0.4=0.9831 t_0.4-0.6=0.8777 t_0.6-0.8=0.6956 t_0.8-1.0=0.6385] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0419 + Deleted old checkpoint: checkpoint_epoch_0416 +[MEM @ epoch 419 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 420/499 (84% done) --- + [Epoch 420][10/40] loss=0.783880 avg=0.825251 VRAM=38.9GiB | 84.0% done | ETA(epoch): 692s + [Epoch 420][20/40] loss=0.874105 avg=0.842655 VRAM=38.8GiB | 84.1% done | ETA(epoch): 461s + [Epoch 420][30/40] loss=0.835437 avg=0.841202 VRAM=38.9GiB | 84.2% done | ETA(epoch): 231s + [Epoch 420][40/40] loss=0.768846 avg=0.840320 VRAM=38.8GiB | 84.2% done | ETA(epoch): 0s + Train loss: 0.840320 (922.3s) ETA: 1320min + Val loss: 0.867753 [t_0.0-0.2=1.0747 t_0.2-0.4=1.0038 t_0.4-0.6=0.8462 t_0.6-0.8=0.6782 t_0.8-1.0=0.6282] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0420 + Deleted old checkpoint: checkpoint_epoch_0417 +[MEM @ epoch 420 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 421/499 (84% done) --- + [Epoch 421][10/40] loss=0.799452 avg=0.829133 VRAM=38.9GiB | 84.2% done | ETA(epoch): 691s + [Epoch 421][20/40] loss=0.790524 avg=0.843490 VRAM=38.8GiB | 84.3% done | ETA(epoch): 461s + [Epoch 421][30/40] loss=0.822583 avg=0.842974 VRAM=38.9GiB | 84.4% done | ETA(epoch): 230s + [Epoch 421][40/40] loss=0.814310 avg=0.840603 VRAM=38.8GiB | 84.4% done | ETA(epoch): 0s + Train loss: 0.840603 (921.6s) ETA: 1303min + Val loss: 0.840560 [t_0.0-0.2=1.0550 t_0.2-0.4=0.9789 t_0.4-0.6=0.8461 t_0.6-0.8=0.6956 t_0.8-1.0=0.6433] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0421 + Deleted old checkpoint: checkpoint_epoch_0418 +[MEM @ epoch 421 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 422/499 (84% done) --- + [Epoch 422][10/40] loss=0.893702 avg=0.866205 VRAM=38.9GiB | 84.5% done | ETA(epoch): 691s + [Epoch 422][20/40] loss=0.801258 avg=0.868674 VRAM=38.8GiB | 84.5% done | ETA(epoch): 461s + [Epoch 422][30/40] loss=0.823230 avg=0.865371 VRAM=38.9GiB | 84.5% done | ETA(epoch): 230s + [Epoch 422][40/40] loss=0.775644 avg=0.857461 VRAM=38.8GiB | 84.6% done | ETA(epoch): 0s + Train loss: 0.857461 (921.5s) ETA: 1287min + Val loss: 0.878005 [t_0.0-0.2=1.0666 t_0.2-0.4=0.9982 t_0.4-0.6=0.8469 t_0.6-0.8=0.7335 t_0.8-1.0=0.6347] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0422 + Deleted old checkpoint: checkpoint_epoch_0419 +[MEM @ epoch 422 end] RAM: 20.8/188.4 GiB (11.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 423/499 (85% done) --- + [Epoch 423][10/40] loss=0.718979 avg=0.844791 VRAM=38.9GiB | 84.7% done | ETA(epoch): 692s + [Epoch 423][20/40] loss=0.849150 avg=0.842869 VRAM=38.8GiB | 84.7% done | ETA(epoch): 461s + [Epoch 423][30/40] loss=0.932906 avg=0.847346 VRAM=38.9GiB | 84.8% done | ETA(epoch): 231s + [Epoch 423][40/40] loss=0.769571 avg=0.846250 VRAM=38.8GiB | 84.8% done | ETA(epoch): 0s + Train loss: 0.846250 (922.4s) ETA: 1270min + Val loss: 0.835958 [t_0.0-0.2=1.0830 t_0.2-0.4=1.0288 t_0.4-0.6=0.8288 t_0.6-0.8=0.6854 t_0.8-1.0=0.6299] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0423 + Deleted old checkpoint: checkpoint_epoch_0420 +[MEM @ epoch 423 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 424/499 (85% done) --- + [Epoch 424][10/40] loss=0.799685 avg=0.833384 VRAM=38.9GiB | 84.9% done | ETA(epoch): 691s + [Epoch 424][20/40] loss=0.768880 avg=0.843096 VRAM=38.8GiB | 84.9% done | ETA(epoch): 461s + [Epoch 424][30/40] loss=0.827324 avg=0.839489 VRAM=38.9GiB | 85.0% done | ETA(epoch): 230s + [Epoch 424][40/40] loss=0.884445 avg=0.840111 VRAM=38.8GiB | 85.0% done | ETA(epoch): 0s + Train loss: 0.840111 (921.5s) ETA: 1253min + Val loss: 0.849124 [t_0.0-0.2=1.0805 t_0.2-0.4=0.9785 t_0.4-0.6=0.8152 t_0.6-0.8=0.7248 t_0.8-1.0=0.6283] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0424 + Deleted old checkpoint: checkpoint_epoch_0421 +[MEM @ epoch 424 end] RAM: 20.9/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 425/499 (85% done) --- + [Epoch 425][10/40] loss=0.813036 avg=0.859686 VRAM=38.9GiB | 85.0% done | ETA(epoch): 691s + [Epoch 425][20/40] loss=0.771404 avg=0.847472 VRAM=38.8GiB | 85.1% done | ETA(epoch): 461s + [Epoch 425][30/40] loss=0.828492 avg=0.850749 VRAM=38.9GiB | 85.2% done | ETA(epoch): 230s + [Epoch 425][40/40] loss=0.825443 avg=0.837314 VRAM=38.8GiB | 85.2% done | ETA(epoch): 0s + Train loss: 0.837314 (921.8s) ETA: 1236min + Val loss: 0.862038 [t_0.0-0.2=1.0836 t_0.2-0.4=0.9963 t_0.4-0.6=0.8533 t_0.6-0.8=0.7013 t_0.8-1.0=0.6328] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0425 + Deleted old checkpoint: checkpoint_epoch_0422 +[MEM @ epoch 425 end] RAM: 20.8/188.4 GiB (11.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 426/499 (85% done) --- + [Epoch 426][10/40] loss=0.863812 avg=0.843500 VRAM=38.9GiB | 85.2% done | ETA(epoch): 691s + [Epoch 426][20/40] loss=0.878389 avg=0.849527 VRAM=38.8GiB | 85.3% done | ETA(epoch): 460s + [Epoch 426][30/40] loss=0.868467 avg=0.853199 VRAM=38.9GiB | 85.4% done | ETA(epoch): 230s + [Epoch 426][40/40] loss=0.816376 avg=0.858157 VRAM=38.8GiB | 85.4% done | ETA(epoch): 0s + Train loss: 0.858157 (921.3s) ETA: 1220min + Val loss: 0.856411 [t_0.0-0.2=1.0719 t_0.2-0.4=1.0201 t_0.4-0.6=0.8376 t_0.6-0.8=0.7339 t_0.8-1.0=0.6286] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0426 + Deleted old checkpoint: checkpoint_epoch_0423 +[MEM @ epoch 426 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 427/499 (85% done) --- + [Epoch 427][10/40] loss=0.839369 avg=0.813168 VRAM=38.9GiB | 85.5% done | ETA(epoch): 691s + [Epoch 427][20/40] loss=0.890570 avg=0.828550 VRAM=38.8GiB | 85.5% done | ETA(epoch): 461s + [Epoch 427][30/40] loss=0.778273 avg=0.822508 VRAM=38.9GiB | 85.5% done | ETA(epoch): 230s + [Epoch 427][40/40] loss=0.862579 avg=0.839905 VRAM=38.8GiB | 85.6% done | ETA(epoch): 0s + Train loss: 0.839905 (921.8s) ETA: 1203min + Val loss: 0.854272 [t_0.0-0.2=1.0733 t_0.2-0.4=1.0229 t_0.4-0.6=0.8333 t_0.6-0.8=0.7081 t_0.8-1.0=0.6181] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0427 + Deleted old checkpoint: checkpoint_epoch_0424 +[MEM @ epoch 427 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 428/499 (86% done) --- + [Epoch 428][10/40] loss=0.877376 avg=0.835418 VRAM=38.9GiB | 85.7% done | ETA(epoch): 691s + [Epoch 428][20/40] loss=0.857319 avg=0.835700 VRAM=38.8GiB | 85.7% done | ETA(epoch): 461s + [Epoch 428][30/40] loss=0.788980 avg=0.846165 VRAM=38.9GiB | 85.8% done | ETA(epoch): 230s + [Epoch 428][40/40] loss=0.958820 avg=0.846708 VRAM=38.8GiB | 85.8% done | ETA(epoch): 0s + Train loss: 0.846708 (921.3s) ETA: 1186min + Val loss: 0.864414 [t_0.0-0.2=1.0654 t_0.2-0.4=1.0026 t_0.4-0.6=0.8473 t_0.6-0.8=0.7165 t_0.8-1.0=0.6277] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0428 + Deleted old checkpoint: checkpoint_epoch_0425 +[MEM @ epoch 428 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 429/499 (86% done) --- + [Epoch 429][10/40] loss=0.780783 avg=0.847601 VRAM=38.9GiB | 85.9% done | ETA(epoch): 690s + [Epoch 429][20/40] loss=0.865153 avg=0.850664 VRAM=38.8GiB | 85.9% done | ETA(epoch): 460s + [Epoch 429][30/40] loss=0.840475 avg=0.849308 VRAM=38.9GiB | 86.0% done | ETA(epoch): 230s + [Epoch 429][40/40] loss=0.783926 avg=0.832277 VRAM=38.8GiB | 86.0% done | ETA(epoch): 0s + Train loss: 0.832277 (921.2s) ETA: 1169min + Val loss: 0.859450 [t_0.0-0.2=1.0651 t_0.2-0.4=0.9914 t_0.4-0.6=0.8391 t_0.6-0.8=0.7270 t_0.8-1.0=0.6347] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0429 + Deleted old checkpoint: checkpoint_epoch_0426 +[MEM @ epoch 429 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 430/499 (86% done) --- + [Epoch 430][10/40] loss=0.847530 avg=0.849522 VRAM=38.9GiB | 86.1% done | ETA(epoch): 691s + [Epoch 430][20/40] loss=0.795177 avg=0.840613 VRAM=38.8GiB | 86.1% done | ETA(epoch): 461s + [Epoch 430][30/40] loss=0.852207 avg=0.848423 VRAM=38.9GiB | 86.2% done | ETA(epoch): 230s + [Epoch 430][40/40] loss=0.874950 avg=0.849366 VRAM=38.8GiB | 86.2% done | ETA(epoch): 0s + Train loss: 0.849366 (921.4s) ETA: 1153min + Val loss: 0.858183 [t_0.0-0.2=1.0745 t_0.2-0.4=1.0022 t_0.4-0.6=0.8277 t_0.6-0.8=0.6981 t_0.8-1.0=0.6245] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0430 + Deleted old checkpoint: checkpoint_epoch_0427 +[MEM @ epoch 430 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 431/499 (86% done) --- + [Epoch 431][10/40] loss=0.864258 avg=0.857484 VRAM=38.9GiB | 86.2% done | ETA(epoch): 691s + [Epoch 431][20/40] loss=0.791487 avg=0.869706 VRAM=38.8GiB | 86.3% done | ETA(epoch): 460s + [Epoch 431][30/40] loss=0.892421 avg=0.860004 VRAM=38.9GiB | 86.4% done | ETA(epoch): 230s + [Epoch 431][40/40] loss=0.834876 avg=0.854175 VRAM=38.8GiB | 86.4% done | ETA(epoch): 0s + Train loss: 0.854175 (921.1s) ETA: 1136min + Val loss: 0.822408 [t_0.0-0.2=1.0705 t_0.2-0.4=1.0016 t_0.4-0.6=0.8278 t_0.6-0.8=0.6976 t_0.8-1.0=0.6295] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0431 + Deleted old checkpoint: checkpoint_epoch_0428 +[MEM @ epoch 431 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 432/499 (86% done) --- + [Epoch 432][10/40] loss=0.749163 avg=0.872609 VRAM=38.9GiB | 86.5% done | ETA(epoch): 691s + [Epoch 432][20/40] loss=0.861240 avg=0.860507 VRAM=38.8GiB | 86.5% done | ETA(epoch): 461s + [Epoch 432][30/40] loss=0.944561 avg=0.873632 VRAM=38.9GiB | 86.6% done | ETA(epoch): 230s + [Epoch 432][40/40] loss=0.886913 avg=0.866458 VRAM=38.8GiB | 86.6% done | ETA(epoch): 0s + Train loss: 0.866458 (920.9s) ETA: 1119min + Val loss: 0.857383 [t_0.0-0.2=1.0751 t_0.2-0.4=0.9332 t_0.4-0.6=0.8675 t_0.6-0.8=0.7145 t_0.8-1.0=0.6240] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0432 + Deleted old checkpoint: checkpoint_epoch_0429 +[MEM @ epoch 432 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 433/499 (87% done) --- + [Epoch 433][10/40] loss=0.852106 avg=0.872071 VRAM=38.9GiB | 86.7% done | ETA(epoch): 691s + [Epoch 433][20/40] loss=0.845684 avg=0.880614 VRAM=38.8GiB | 86.7% done | ETA(epoch): 461s + [Epoch 433][30/40] loss=0.747732 avg=0.869107 VRAM=38.9GiB | 86.8% done | ETA(epoch): 230s + [Epoch 433][40/40] loss=0.906207 avg=0.857809 VRAM=38.8GiB | 86.8% done | ETA(epoch): 0s + Train loss: 0.857809 (921.4s) ETA: 1102min + Val loss: 0.804988 [t_0.0-0.2=1.0809 t_0.2-0.4=0.9921 t_0.4-0.6=0.8115 t_0.6-0.8=0.7139 t_0.8-1.0=0.6307] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0433 + Deleted old checkpoint: checkpoint_epoch_0430 +[MEM @ epoch 433 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 434/499 (87% done) --- + [Epoch 434][10/40] loss=0.873753 avg=0.854670 VRAM=38.9GiB | 86.9% done | ETA(epoch): 691s + [Epoch 434][20/40] loss=0.780816 avg=0.861479 VRAM=38.8GiB | 86.9% done | ETA(epoch): 461s + [Epoch 434][30/40] loss=0.810345 avg=0.862431 VRAM=38.9GiB | 87.0% done | ETA(epoch): 230s + [Epoch 434][40/40] loss=0.854139 avg=0.864062 VRAM=38.8GiB | 87.0% done | ETA(epoch): 0s + Train loss: 0.864062 (920.8s) ETA: 1086min + Val loss: 0.841858 [t_0.0-0.2=1.0752 t_0.2-0.4=1.0018 t_0.4-0.6=0.8467 t_0.6-0.8=0.7196 t_0.8-1.0=0.6377] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0434 + Deleted old checkpoint: checkpoint_epoch_0431 +[MEM @ epoch 434 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 435/499 (87% done) --- + [Epoch 435][10/40] loss=0.887664 avg=0.843483 VRAM=38.9GiB | 87.1% done | ETA(epoch): 690s + [Epoch 435][20/40] loss=0.735805 avg=0.857224 VRAM=38.8GiB | 87.1% done | ETA(epoch): 460s + [Epoch 435][30/40] loss=0.797535 avg=0.865853 VRAM=38.9GiB | 87.2% done | ETA(epoch): 230s + [Epoch 435][40/40] loss=0.795611 avg=0.858999 VRAM=38.8GiB | 87.2% done | ETA(epoch): 0s + Train loss: 0.858999 (920.6s) ETA: 1069min + Val loss: 0.806510 [t_0.0-0.2=1.0631 t_0.2-0.4=0.9896 t_0.4-0.6=0.8452 t_0.6-0.8=0.6966 t_0.8-1.0=0.6250] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0435 + Deleted old checkpoint: checkpoint_epoch_0432 +[MEM @ epoch 435 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 436/499 (87% done) --- + [Epoch 436][10/40] loss=0.771050 avg=0.828080 VRAM=38.9GiB | 87.2% done | ETA(epoch): 690s + [Epoch 436][20/40] loss=0.760067 avg=0.825599 VRAM=38.8GiB | 87.3% done | ETA(epoch): 460s + [Epoch 436][30/40] loss=0.796290 avg=0.835132 VRAM=38.9GiB | 87.4% done | ETA(epoch): 230s + [Epoch 436][40/40] loss=0.929606 avg=0.841564 VRAM=38.8GiB | 87.4% done | ETA(epoch): 0s + Train loss: 0.841564 (919.9s) ETA: 1052min + Val loss: 0.836966 [t_0.0-0.2=1.0860 t_0.2-0.4=0.9843 t_0.4-0.6=0.8238 t_0.6-0.8=0.6909 t_0.8-1.0=0.6302] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0436 + Deleted old checkpoint: checkpoint_epoch_0433 +[MEM @ epoch 436 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 437/499 (87% done) --- + [Epoch 437][10/40] loss=0.710504 avg=0.852574 VRAM=38.9GiB | 87.5% done | ETA(epoch): 690s + [Epoch 437][20/40] loss=0.834846 avg=0.860101 VRAM=38.8GiB | 87.5% done | ETA(epoch): 460s + [Epoch 437][30/40] loss=0.849485 avg=0.860844 VRAM=38.9GiB | 87.5% done | ETA(epoch): 230s + [Epoch 437][40/40] loss=0.999086 avg=0.866744 VRAM=38.8GiB | 87.6% done | ETA(epoch): 0s + Train loss: 0.866744 (921.0s) ETA: 1035min + Val loss: 0.844114 [t_0.0-0.2=1.0685 t_0.2-0.4=0.9963 t_0.4-0.6=0.8359 t_0.6-0.8=0.7035 t_0.8-1.0=0.6178] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0437 + Deleted old checkpoint: checkpoint_epoch_0434 +[MEM @ epoch 437 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 438/499 (88% done) --- + [Epoch 438][10/40] loss=0.808432 avg=0.812325 VRAM=38.9GiB | 87.6% done | ETA(epoch): 691s + [Epoch 438][20/40] loss=0.830198 avg=0.828152 VRAM=38.8GiB | 87.7% done | ETA(epoch): 461s + [Epoch 438][30/40] loss=0.919060 avg=0.837750 VRAM=38.9GiB | 87.8% done | ETA(epoch): 230s + [Epoch 438][40/40] loss=0.822654 avg=0.851200 VRAM=38.8GiB | 87.8% done | ETA(epoch): 0s + Train loss: 0.851200 (921.2s) ETA: 1019min + Val loss: 0.834935 [t_0.0-0.2=1.0758 t_0.2-0.4=1.0224 t_0.4-0.6=0.8204 t_0.6-0.8=0.7056 t_0.8-1.0=0.6335] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0438 + Deleted old checkpoint: checkpoint_epoch_0435 +[MEM @ epoch 438 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 439/499 (88% done) --- + [Epoch 439][10/40] loss=0.726355 avg=0.838708 VRAM=38.9GiB | 87.8% done | ETA(epoch): 691s + [Epoch 439][20/40] loss=0.762031 avg=0.844948 VRAM=38.8GiB | 87.9% done | ETA(epoch): 461s + [Epoch 439][30/40] loss=0.895626 avg=0.851147 VRAM=38.9GiB | 87.9% done | ETA(epoch): 230s + [Epoch 439][40/40] loss=0.844020 avg=0.852386 VRAM=38.8GiB | 88.0% done | ETA(epoch): 0s + Train loss: 0.852386 (921.6s) ETA: 1002min + Val loss: 0.892233 [t_0.0-0.2=1.0638 t_0.2-0.4=1.0041 t_0.4-0.6=0.8210 t_0.6-0.8=0.7276 t_0.8-1.0=0.6063] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0439 + Deleted old checkpoint: checkpoint_epoch_0436 +[MEM @ epoch 439 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 440/499 (88% done) --- + [Epoch 440][10/40] loss=0.776262 avg=0.820756 VRAM=38.9GiB | 88.0% done | ETA(epoch): 690s + [Epoch 440][20/40] loss=0.793563 avg=0.832107 VRAM=38.8GiB | 88.1% done | ETA(epoch): 460s + [Epoch 440][30/40] loss=0.783578 avg=0.840061 VRAM=38.9GiB | 88.1% done | ETA(epoch): 230s + [Epoch 440][40/40] loss=0.761167 avg=0.841478 VRAM=38.8GiB | 88.2% done | ETA(epoch): 0s + Train loss: 0.841478 (921.1s) ETA: 985min + Val loss: 0.865819 [t_0.0-0.2=1.0590 t_0.2-0.4=1.0000 t_0.4-0.6=0.8165 t_0.6-0.8=0.7269 t_0.8-1.0=0.6437] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0440 + Deleted old checkpoint: checkpoint_epoch_0437 +[MEM @ epoch 440 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 441/499 (88% done) --- + [Epoch 441][10/40] loss=1.007018 avg=0.908053 VRAM=38.9GiB | 88.2% done | ETA(epoch): 691s + [Epoch 441][20/40] loss=0.755156 avg=0.906379 VRAM=38.8GiB | 88.3% done | ETA(epoch): 461s + [Epoch 441][30/40] loss=0.891722 avg=0.900561 VRAM=38.9GiB | 88.3% done | ETA(epoch): 231s + [Epoch 441][40/40] loss=0.792579 avg=0.886098 VRAM=38.8GiB | 88.4% done | ETA(epoch): 0s + Train loss: 0.886098 (921.9s) ETA: 968min + Val loss: 0.844957 [t_0.0-0.2=1.0756 t_0.2-0.4=1.0160 t_0.4-0.6=0.8493 t_0.6-0.8=0.7084 t_0.8-1.0=0.6214] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0441 + Deleted old checkpoint: checkpoint_epoch_0438 +[MEM @ epoch 441 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 442/499 (88% done) --- + [Epoch 442][10/40] loss=0.794344 avg=0.839570 VRAM=38.9GiB | 88.4% done | ETA(epoch): 690s + [Epoch 442][20/40] loss=0.949237 avg=0.841023 VRAM=38.8GiB | 88.5% done | ETA(epoch): 460s + [Epoch 442][30/40] loss=0.901336 avg=0.848629 VRAM=38.9GiB | 88.5% done | ETA(epoch): 230s + [Epoch 442][40/40] loss=0.924473 avg=0.848980 VRAM=38.8GiB | 88.6% done | ETA(epoch): 0s + Train loss: 0.848980 (921.0s) ETA: 952min + Val loss: 0.868660 [t_0.0-0.2=1.0588 t_0.2-0.4=0.9771 t_0.4-0.6=0.8473 t_0.6-0.8=0.7220 t_0.8-1.0=0.6239] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0442 + Deleted old checkpoint: checkpoint_epoch_0439 +[MEM @ epoch 442 end] RAM: 21.5/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 443/499 (89% done) --- + [Epoch 443][10/40] loss=0.839592 avg=0.845989 VRAM=38.9GiB | 88.6% done | ETA(epoch): 691s + [Epoch 443][20/40] loss=0.877772 avg=0.877938 VRAM=38.8GiB | 88.7% done | ETA(epoch): 461s + [Epoch 443][30/40] loss=0.860750 avg=0.872243 VRAM=38.9GiB | 88.8% done | ETA(epoch): 230s + [Epoch 443][40/40] loss=0.795193 avg=0.857953 VRAM=38.8GiB | 88.8% done | ETA(epoch): 0s + Train loss: 0.857953 (921.4s) ETA: 935min + Val loss: 0.845371 [t_0.0-0.2=1.0695 t_0.2-0.4=0.9975 t_0.4-0.6=0.8205 t_0.6-0.8=0.7125 t_0.8-1.0=0.6282] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0443 + Deleted old checkpoint: checkpoint_epoch_0440 +[MEM @ epoch 443 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 444/499 (89% done) --- + [Epoch 444][10/40] loss=0.909768 avg=0.859195 VRAM=38.9GiB | 88.8% done | ETA(epoch): 690s + [Epoch 444][20/40] loss=0.848242 avg=0.848247 VRAM=38.8GiB | 88.9% done | ETA(epoch): 460s + [Epoch 444][30/40] loss=0.926496 avg=0.860581 VRAM=38.9GiB | 88.9% done | ETA(epoch): 230s + [Epoch 444][40/40] loss=0.805803 avg=0.860673 VRAM=38.8GiB | 89.0% done | ETA(epoch): 0s + Train loss: 0.860673 (921.0s) ETA: 918min + Val loss: 0.858436 [t_0.0-0.2=1.0753 t_0.2-0.4=1.0071 t_0.4-0.6=0.8175 t_0.6-0.8=0.6970 t_0.8-1.0=0.6368] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0444 + Deleted old checkpoint: checkpoint_epoch_0441 +[MEM @ epoch 444 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 445/499 (89% done) --- + [Epoch 445][10/40] loss=0.904297 avg=0.859111 VRAM=38.9GiB | 89.0% done | ETA(epoch): 690s + [Epoch 445][20/40] loss=0.958827 avg=0.871631 VRAM=38.8GiB | 89.1% done | ETA(epoch): 460s + [Epoch 445][30/40] loss=0.887926 avg=0.869368 VRAM=38.9GiB | 89.1% done | ETA(epoch): 230s + [Epoch 445][40/40] loss=0.871678 avg=0.859351 VRAM=38.8GiB | 89.2% done | ETA(epoch): 0s + Train loss: 0.859351 (921.2s) ETA: 902min + Val loss: 0.854122 [t_0.0-0.2=1.0613 t_0.2-0.4=0.9869 t_0.4-0.6=0.8567 t_0.6-0.8=0.6987 t_0.8-1.0=0.6368] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0445 + Deleted old checkpoint: checkpoint_epoch_0442 +[MEM @ epoch 445 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 446/499 (89% done) --- + [Epoch 446][10/40] loss=0.855439 avg=0.859995 VRAM=38.9GiB | 89.2% done | ETA(epoch): 691s + [Epoch 446][20/40] loss=0.758668 avg=0.875619 VRAM=38.8GiB | 89.3% done | ETA(epoch): 461s + [Epoch 446][30/40] loss=0.892653 avg=0.869663 VRAM=38.9GiB | 89.3% done | ETA(epoch): 230s + [Epoch 446][40/40] loss=0.900326 avg=0.863018 VRAM=38.8GiB | 89.4% done | ETA(epoch): 0s + Train loss: 0.863018 (921.6s) ETA: 885min + Val loss: 0.847146 [t_0.0-0.2=1.0675 t_0.2-0.4=0.9976 t_0.4-0.6=0.8693 t_0.6-0.8=0.6900 t_0.8-1.0=0.6266] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0446 + Deleted old checkpoint: checkpoint_epoch_0443 +[MEM @ epoch 446 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 447/499 (89% done) --- + [Epoch 447][10/40] loss=0.884531 avg=0.881193 VRAM=38.9GiB | 89.5% done | ETA(epoch): 691s + [Epoch 447][20/40] loss=0.891388 avg=0.881956 VRAM=38.8GiB | 89.5% done | ETA(epoch): 461s + [Epoch 447][30/40] loss=0.783491 avg=0.871353 VRAM=38.9GiB | 89.5% done | ETA(epoch): 230s + [Epoch 447][40/40] loss=0.779471 avg=0.866951 VRAM=38.8GiB | 89.6% done | ETA(epoch): 0s + Train loss: 0.866951 (921.7s) ETA: 868min + Val loss: 0.844401 [t_0.0-0.2=1.0663 t_0.2-0.4=1.0018 t_0.4-0.6=0.8469 t_0.6-0.8=0.7193 t_0.8-1.0=0.6220] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0447 + Deleted old checkpoint: checkpoint_epoch_0444 +[MEM @ epoch 447 end] RAM: 21.6/188.4 GiB (11.5%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 448/499 (90% done) --- + [Epoch 448][10/40] loss=0.963651 avg=0.852931 VRAM=38.9GiB | 89.6% done | ETA(epoch): 691s + [Epoch 448][20/40] loss=0.874944 avg=0.856398 VRAM=38.8GiB | 89.7% done | ETA(epoch): 461s + [Epoch 448][30/40] loss=0.723339 avg=0.840116 VRAM=38.9GiB | 89.8% done | ETA(epoch): 230s + [Epoch 448][40/40] loss=0.897905 avg=0.841132 VRAM=38.8GiB | 89.8% done | ETA(epoch): 0s + Train loss: 0.841132 (921.4s) ETA: 851min + Val loss: 0.836766 [t_0.0-0.2=1.0640 t_0.2-0.4=1.0193 t_0.4-0.6=0.8469 t_0.6-0.8=0.7054 t_0.8-1.0=0.6320] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0448 + Deleted old checkpoint: checkpoint_epoch_0445 +[MEM @ epoch 448 end] RAM: 21.6/188.4 GiB (11.4%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 449/499 (90% done) --- + [Epoch 449][10/40] loss=0.807115 avg=0.836807 VRAM=38.9GiB | 89.8% done | ETA(epoch): 691s + [Epoch 449][20/40] loss=0.778587 avg=0.823691 VRAM=38.8GiB | 89.9% done | ETA(epoch): 461s + [Epoch 449][30/40] loss=0.827400 avg=0.829552 VRAM=38.9GiB | 90.0% done | ETA(epoch): 230s + [Epoch 449][40/40] loss=0.841546 avg=0.834667 VRAM=38.8GiB | 90.0% done | ETA(epoch): 0s + [MilestoneVis] train_0 step 17999 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 17999 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 17999 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 17999 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 17999 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 17999 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 17999 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 17999 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 17999 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 17999 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + Train loss: 0.834667 (1677.3s) ETA: 836min + Val loss: 0.853316 [t_0.0-0.2=1.0631 t_0.2-0.4=0.9953 t_0.4-0.6=0.8383 t_0.6-0.8=0.7188 t_0.8-1.0=0.6211] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0449 + Deleted old checkpoint: checkpoint_epoch_0446 +[MEM @ epoch 449 end] RAM: 22.2/188.4 GiB (11.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 450/499 (90% done) --- + [Epoch 450][10/40] loss=0.819643 avg=0.832422 VRAM=38.9GiB | 90.0% done | ETA(epoch): 691s + [Epoch 450][20/40] loss=0.872540 avg=0.855199 VRAM=38.8GiB | 90.1% done | ETA(epoch): 460s + [Epoch 450][30/40] loss=0.876255 avg=0.860516 VRAM=38.9GiB | 90.1% done | ETA(epoch): 230s + [Epoch 450][40/40] loss=0.839626 avg=0.860439 VRAM=38.8GiB | 90.2% done | ETA(epoch): 0s + Train loss: 0.860439 (920.8s) ETA: 819min + Val loss: 0.841530 [t_0.0-0.2=1.0692 t_0.2-0.4=0.9585 t_0.4-0.6=0.8642 t_0.6-0.8=0.6812 t_0.8-1.0=0.6408] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0450 + Deleted old checkpoint: checkpoint_epoch_0447 +[MEM @ epoch 450 end] RAM: 22.2/188.4 GiB (11.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 451/499 (90% done) --- + [Epoch 451][10/40] loss=0.754876 avg=0.862447 VRAM=38.9GiB | 90.2% done | ETA(epoch): 690s + [Epoch 451][20/40] loss=0.848740 avg=0.850115 VRAM=38.8GiB | 90.3% done | ETA(epoch): 460s + [Epoch 451][30/40] loss=0.861721 avg=0.848317 VRAM=38.9GiB | 90.3% done | ETA(epoch): 230s + [Epoch 451][40/40] loss=0.898189 avg=0.853509 VRAM=38.8GiB | 90.4% done | ETA(epoch): 0s + Train loss: 0.853509 (920.7s) ETA: 803min + Val loss: 0.826329 [t_0.0-0.2=1.0656 t_0.2-0.4=0.9965 t_0.4-0.6=0.8507 t_0.6-0.8=0.7033 t_0.8-1.0=0.6298] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0451 + Deleted old checkpoint: checkpoint_epoch_0448 +[MEM @ epoch 451 end] RAM: 22.2/188.4 GiB (11.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 452/499 (90% done) --- + [Epoch 452][10/40] loss=0.778096 avg=0.847596 VRAM=38.9GiB | 90.5% done | ETA(epoch): 691s + [Epoch 452][20/40] loss=0.854434 avg=0.863128 VRAM=38.8GiB | 90.5% done | ETA(epoch): 460s + [Epoch 452][30/40] loss=0.839059 avg=0.868754 VRAM=38.9GiB | 90.5% done | ETA(epoch): 230s + [Epoch 452][40/40] loss=0.866810 avg=0.869687 VRAM=38.8GiB | 90.6% done | ETA(epoch): 0s + Train loss: 0.869687 (921.1s) ETA: 786min + Val loss: 0.837587 [t_0.0-0.2=1.0594 t_0.2-0.4=0.9628 t_0.4-0.6=0.8433 t_0.6-0.8=0.7290 t_0.8-1.0=0.6211] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0452 + Deleted old checkpoint: checkpoint_epoch_0449 +[MEM @ epoch 452 end] RAM: 22.2/188.4 GiB (11.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 453/499 (91% done) --- + [Epoch 453][10/40] loss=0.901554 avg=0.848510 VRAM=38.9GiB | 90.6% done | ETA(epoch): 691s + [Epoch 453][20/40] loss=0.896164 avg=0.852080 VRAM=38.8GiB | 90.7% done | ETA(epoch): 461s + [Epoch 453][30/40] loss=0.835157 avg=0.860346 VRAM=38.9GiB | 90.8% done | ETA(epoch): 230s + [Epoch 453][40/40] loss=0.910956 avg=0.860935 VRAM=38.8GiB | 90.8% done | ETA(epoch): 0s + Train loss: 0.860935 (921.2s) ETA: 769min + Val loss: 0.854021 [t_0.0-0.2=1.0649 t_0.2-0.4=0.9406 t_0.4-0.6=0.8520 t_0.6-0.8=0.7265 t_0.8-1.0=0.6374] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0453 + Deleted old checkpoint: checkpoint_epoch_0450 +[MEM @ epoch 453 end] RAM: 22.3/188.4 GiB (11.8%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 454/499 (91% done) --- + [Epoch 454][10/40] loss=0.840898 avg=0.847247 VRAM=38.9GiB | 90.8% done | ETA(epoch): 691s + [Epoch 454][20/40] loss=0.924893 avg=0.855550 VRAM=38.8GiB | 90.9% done | ETA(epoch): 460s + [Epoch 454][30/40] loss=0.800663 avg=0.844243 VRAM=38.9GiB | 91.0% done | ETA(epoch): 230s + [Epoch 454][40/40] loss=0.807875 avg=0.845055 VRAM=38.8GiB | 91.0% done | ETA(epoch): 0s + Train loss: 0.845055 (921.2s) ETA: 752min + Val loss: 0.868269 [t_0.0-0.2=1.0774 t_0.2-0.4=0.9632 t_0.4-0.6=0.8546 t_0.6-0.8=0.7096 t_0.8-1.0=0.6323] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0454 + Deleted old checkpoint: checkpoint_epoch_0451 +[MEM @ epoch 454 end] RAM: 22.5/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 455/499 (91% done) --- + [Epoch 455][10/40] loss=0.868696 avg=0.869878 VRAM=38.9GiB | 91.0% done | ETA(epoch): 690s + [Epoch 455][20/40] loss=0.802299 avg=0.846278 VRAM=38.8GiB | 91.1% done | ETA(epoch): 461s + [Epoch 455][30/40] loss=0.795825 avg=0.842278 VRAM=38.9GiB | 91.1% done | ETA(epoch): 230s + [Epoch 455][40/40] loss=0.820718 avg=0.841850 VRAM=38.8GiB | 91.2% done | ETA(epoch): 0s + Train loss: 0.841850 (921.0s) ETA: 736min + Val loss: 0.825872 [t_0.0-0.2=1.0496 t_0.2-0.4=1.0151 t_0.4-0.6=0.8367 t_0.6-0.8=0.7097 t_0.8-1.0=0.6356] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0455 + Deleted old checkpoint: checkpoint_epoch_0452 +[MEM @ epoch 455 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 456/499 (91% done) --- + [Epoch 456][10/40] loss=0.822332 avg=0.865044 VRAM=38.9GiB | 91.2% done | ETA(epoch): 690s + [Epoch 456][20/40] loss=0.857731 avg=0.849923 VRAM=38.8GiB | 91.3% done | ETA(epoch): 460s + [Epoch 456][30/40] loss=0.898788 avg=0.854419 VRAM=38.9GiB | 91.3% done | ETA(epoch): 230s + [Epoch 456][40/40] loss=0.940595 avg=0.849238 VRAM=38.8GiB | 91.4% done | ETA(epoch): 0s + Train loss: 0.849238 (921.1s) ETA: 719min + Val loss: 0.868466 [t_0.0-0.2=1.0758 t_0.2-0.4=0.9893 t_0.4-0.6=0.8765 t_0.6-0.8=0.7269 t_0.8-1.0=0.6323] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0456 + Deleted old checkpoint: checkpoint_epoch_0453 +[MEM @ epoch 456 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 457/499 (91% done) --- + [Epoch 457][10/40] loss=0.910117 avg=0.868363 VRAM=38.9GiB | 91.5% done | ETA(epoch): 691s + [Epoch 457][20/40] loss=0.877539 avg=0.859232 VRAM=38.8GiB | 91.5% done | ETA(epoch): 461s + [Epoch 457][30/40] loss=0.759054 avg=0.861054 VRAM=38.9GiB | 91.5% done | ETA(epoch): 230s + [Epoch 457][40/40] loss=0.904097 avg=0.868296 VRAM=38.8GiB | 91.6% done | ETA(epoch): 0s + Train loss: 0.868296 (921.8s) ETA: 702min + Val loss: 0.827851 [t_0.0-0.2=1.0643 t_0.2-0.4=0.9960 t_0.4-0.6=0.8004 t_0.6-0.8=0.7040 t_0.8-1.0=0.6212] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0457 + Deleted old checkpoint: checkpoint_epoch_0454 +[MEM @ epoch 457 end] RAM: 22.5/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 458/499 (92% done) --- + [Epoch 458][10/40] loss=0.760732 avg=0.853352 VRAM=38.9GiB | 91.6% done | ETA(epoch): 691s + [Epoch 458][20/40] loss=0.869187 avg=0.863883 VRAM=38.8GiB | 91.7% done | ETA(epoch): 461s + [Epoch 458][30/40] loss=0.792840 avg=0.864200 VRAM=38.9GiB | 91.8% done | ETA(epoch): 230s + [Epoch 458][40/40] loss=0.931649 avg=0.868209 VRAM=38.8GiB | 91.8% done | ETA(epoch): 0s + Train loss: 0.868209 (921.4s) ETA: 685min + Val loss: 0.855048 [t_0.0-0.2=1.0724 t_0.2-0.4=1.0099 t_0.4-0.6=0.8356 t_0.6-0.8=0.6923 t_0.8-1.0=0.6345] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0458 + Deleted old checkpoint: checkpoint_epoch_0455 +[MEM @ epoch 458 end] RAM: 22.5/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 459/499 (92% done) --- + [Epoch 459][10/40] loss=0.888987 avg=0.844074 VRAM=38.9GiB | 91.8% done | ETA(epoch): 690s + [Epoch 459][20/40] loss=0.882641 avg=0.843056 VRAM=38.8GiB | 91.9% done | ETA(epoch): 460s + [Epoch 459][30/40] loss=0.880209 avg=0.851116 VRAM=38.9GiB | 92.0% done | ETA(epoch): 230s + [Epoch 459][40/40] loss=0.734281 avg=0.842367 VRAM=38.8GiB | 92.0% done | ETA(epoch): 0s + Train loss: 0.842367 (921.1s) ETA: 669min + Val loss: 0.831405 [t_0.0-0.2=1.0641 t_0.2-0.4=0.9600 t_0.4-0.6=0.8629 t_0.6-0.8=0.7137 t_0.8-1.0=0.6245] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0459 + Deleted old checkpoint: checkpoint_epoch_0456 +[MEM @ epoch 459 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 460/499 (92% done) --- + [Epoch 460][10/40] loss=0.828661 avg=0.882557 VRAM=38.9GiB | 92.0% done | ETA(epoch): 690s + [Epoch 460][20/40] loss=0.942092 avg=0.881960 VRAM=38.8GiB | 92.1% done | ETA(epoch): 460s + [Epoch 460][30/40] loss=0.926227 avg=0.870795 VRAM=38.9GiB | 92.2% done | ETA(epoch): 230s + [Epoch 460][40/40] loss=0.803527 avg=0.865495 VRAM=38.8GiB | 92.2% done | ETA(epoch): 0s + Train loss: 0.865495 (920.9s) ETA: 652min + Val loss: 0.855362 [t_0.0-0.2=1.0449 t_0.2-0.4=1.0069 t_0.4-0.6=0.8742 t_0.6-0.8=0.7030 t_0.8-1.0=0.6339] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0460 + Deleted old checkpoint: checkpoint_epoch_0457 +[MEM @ epoch 460 end] RAM: 22.5/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 461/499 (92% done) --- + [Epoch 461][10/40] loss=0.870610 avg=0.853941 VRAM=38.9GiB | 92.2% done | ETA(epoch): 690s + [Epoch 461][20/40] loss=0.889086 avg=0.862164 VRAM=38.8GiB | 92.3% done | ETA(epoch): 460s + [Epoch 461][30/40] loss=0.867115 avg=0.862036 VRAM=38.9GiB | 92.3% done | ETA(epoch): 230s + [Epoch 461][40/40] loss=0.764872 avg=0.854439 VRAM=38.8GiB | 92.4% done | ETA(epoch): 0s + Train loss: 0.854439 (920.8s) ETA: 635min + Val loss: 0.814125 [t_0.0-0.2=1.0681 t_0.2-0.4=0.9784 t_0.4-0.6=0.8049 t_0.6-0.8=0.6952 t_0.8-1.0=0.6175] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0461 + Deleted old checkpoint: checkpoint_epoch_0458 +[MEM @ epoch 461 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 462/499 (92% done) --- + [Epoch 462][10/40] loss=0.788662 avg=0.830082 VRAM=38.9GiB | 92.5% done | ETA(epoch): 691s + [Epoch 462][20/40] loss=0.919089 avg=0.839580 VRAM=38.8GiB | 92.5% done | ETA(epoch): 461s + [Epoch 462][30/40] loss=0.938359 avg=0.843548 VRAM=38.9GiB | 92.5% done | ETA(epoch): 230s + [Epoch 462][40/40] loss=0.954794 avg=0.843986 VRAM=38.8GiB | 92.6% done | ETA(epoch): 0s + Train loss: 0.843986 (921.5s) ETA: 618min + Val loss: 0.837968 [t_0.0-0.2=1.0680 t_0.2-0.4=0.9928 t_0.4-0.6=0.8324 t_0.6-0.8=0.7223 t_0.8-1.0=0.6150] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0462 + Deleted old checkpoint: checkpoint_epoch_0459 +[MEM @ epoch 462 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 463/499 (93% done) --- + [Epoch 463][10/40] loss=0.863720 avg=0.824349 VRAM=38.9GiB | 92.7% done | ETA(epoch): 691s + [Epoch 463][20/40] loss=0.873657 avg=0.852151 VRAM=38.8GiB | 92.7% done | ETA(epoch): 460s + [Epoch 463][30/40] loss=0.781629 avg=0.849867 VRAM=38.9GiB | 92.8% done | ETA(epoch): 230s + [Epoch 463][40/40] loss=0.886540 avg=0.849320 VRAM=38.8GiB | 92.8% done | ETA(epoch): 0s + Train loss: 0.849320 (920.6s) ETA: 602min + Val loss: 0.839973 [t_0.0-0.2=1.0616 t_0.2-0.4=1.0036 t_0.4-0.6=0.8463 t_0.6-0.8=0.6945 t_0.8-1.0=0.6239] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0463 + Deleted old checkpoint: checkpoint_epoch_0460 +[MEM @ epoch 463 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 464/499 (93% done) --- + [Epoch 464][10/40] loss=0.900177 avg=0.836617 VRAM=38.9GiB | 92.8% done | ETA(epoch): 690s + [Epoch 464][20/40] loss=0.800262 avg=0.862978 VRAM=38.8GiB | 92.9% done | ETA(epoch): 460s + [Epoch 464][30/40] loss=0.845161 avg=0.862921 VRAM=38.9GiB | 93.0% done | ETA(epoch): 230s + [Epoch 464][40/40] loss=0.862733 avg=0.862147 VRAM=38.8GiB | 93.0% done | ETA(epoch): 0s + Train loss: 0.862147 (919.9s) ETA: 585min + Val loss: 0.849441 [t_0.0-0.2=1.0642 t_0.2-0.4=1.0067 t_0.4-0.6=0.8353 t_0.6-0.8=0.7118 t_0.8-1.0=0.6409] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0464 + Deleted old checkpoint: checkpoint_epoch_0461 +[MEM @ epoch 464 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 465/499 (93% done) --- + [Epoch 465][10/40] loss=0.889123 avg=0.871650 VRAM=38.9GiB | 93.0% done | ETA(epoch): 690s + [Epoch 465][20/40] loss=0.823589 avg=0.855053 VRAM=38.8GiB | 93.1% done | ETA(epoch): 460s + [Epoch 465][30/40] loss=0.868336 avg=0.838503 VRAM=38.9GiB | 93.2% done | ETA(epoch): 230s + [Epoch 465][40/40] loss=0.763417 avg=0.838273 VRAM=38.8GiB | 93.2% done | ETA(epoch): 0s + Train loss: 0.838273 (919.7s) ETA: 568min + Val loss: 0.859979 [t_0.0-0.2=1.0680 t_0.2-0.4=0.9916 t_0.4-0.6=0.8190 t_0.6-0.8=0.7055 t_0.8-1.0=0.6395] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0465 + Deleted old checkpoint: checkpoint_epoch_0462 +[MEM @ epoch 465 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 466/499 (93% done) --- + [Epoch 466][10/40] loss=0.830329 avg=0.828426 VRAM=38.9GiB | 93.2% done | ETA(epoch): 691s + [Epoch 466][20/40] loss=0.926772 avg=0.848752 VRAM=38.8GiB | 93.3% done | ETA(epoch): 460s + [Epoch 466][30/40] loss=0.896945 avg=0.847192 VRAM=38.9GiB | 93.3% done | ETA(epoch): 230s + [Epoch 466][40/40] loss=0.937065 avg=0.846423 VRAM=38.8GiB | 93.4% done | ETA(epoch): 0s + Train loss: 0.846423 (920.7s) ETA: 551min + Val loss: 0.847473 [t_0.0-0.2=1.0701 t_0.2-0.4=1.0066 t_0.4-0.6=0.8138 t_0.6-0.8=0.7010 t_0.8-1.0=0.6432] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0466 + Deleted old checkpoint: checkpoint_epoch_0463 +[MEM @ epoch 466 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 467/499 (93% done) --- + [Epoch 467][10/40] loss=0.864634 avg=0.864081 VRAM=38.9GiB | 93.5% done | ETA(epoch): 691s + [Epoch 467][20/40] loss=0.932649 avg=0.859581 VRAM=38.8GiB | 93.5% done | ETA(epoch): 461s + [Epoch 467][30/40] loss=0.874220 avg=0.840040 VRAM=38.9GiB | 93.5% done | ETA(epoch): 230s + [Epoch 467][40/40] loss=0.882268 avg=0.841896 VRAM=38.8GiB | 93.6% done | ETA(epoch): 0s + Train loss: 0.841896 (921.0s) ETA: 535min + Val loss: 0.844486 [t_0.0-0.2=1.0713 t_0.2-0.4=0.9880 t_0.4-0.6=0.8062 t_0.6-0.8=0.7088 t_0.8-1.0=0.6373] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0467 + Deleted old checkpoint: checkpoint_epoch_0464 +[MEM @ epoch 467 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 468/499 (94% done) --- + [Epoch 468][10/40] loss=0.848998 avg=0.862766 VRAM=38.9GiB | 93.7% done | ETA(epoch): 690s + [Epoch 468][20/40] loss=0.953224 avg=0.856911 VRAM=38.8GiB | 93.7% done | ETA(epoch): 460s + [Epoch 468][30/40] loss=0.853777 avg=0.857415 VRAM=38.9GiB | 93.8% done | ETA(epoch): 230s + [Epoch 468][40/40] loss=0.818046 avg=0.850961 VRAM=38.8GiB | 93.8% done | ETA(epoch): 0s + Train loss: 0.850961 (920.6s) ETA: 518min + Val loss: 0.813016 [t_0.0-0.2=1.0783 t_0.2-0.4=0.9724 t_0.4-0.6=0.8330 t_0.6-0.8=0.7067 t_0.8-1.0=0.6355] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0468 + Deleted old checkpoint: checkpoint_epoch_0465 +[MEM @ epoch 468 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 469/499 (94% done) --- + [Epoch 469][10/40] loss=0.890016 avg=0.842732 VRAM=38.9GiB | 93.8% done | ETA(epoch): 691s + [Epoch 469][20/40] loss=0.829321 avg=0.853592 VRAM=38.8GiB | 93.9% done | ETA(epoch): 460s + [Epoch 469][30/40] loss=0.759237 avg=0.841863 VRAM=38.9GiB | 94.0% done | ETA(epoch): 230s + [Epoch 469][40/40] loss=0.857070 avg=0.836777 VRAM=38.8GiB | 94.0% done | ETA(epoch): 0s + Train loss: 0.836777 (920.2s) ETA: 501min + Val loss: 0.847077 [t_0.0-0.2=1.0768 t_0.2-0.4=0.9927 t_0.4-0.6=0.8310 t_0.6-0.8=0.7019 t_0.8-1.0=0.6479] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0469 + Deleted old checkpoint: checkpoint_epoch_0466 +[MEM @ epoch 469 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 470/499 (94% done) --- + [Epoch 470][10/40] loss=0.953051 avg=0.861253 VRAM=38.9GiB | 94.0% done | ETA(epoch): 690s + [Epoch 470][20/40] loss=0.938363 avg=0.861615 VRAM=38.8GiB | 94.1% done | ETA(epoch): 460s + [Epoch 470][30/40] loss=0.773660 avg=0.848016 VRAM=38.9GiB | 94.2% done | ETA(epoch): 230s + [Epoch 470][40/40] loss=0.871399 avg=0.856438 VRAM=38.8GiB | 94.2% done | ETA(epoch): 0s + Train loss: 0.856438 (921.0s) ETA: 485min + Val loss: 0.845678 [t_0.0-0.2=1.0534 t_0.2-0.4=0.9773 t_0.4-0.6=0.8615 t_0.6-0.8=0.7116 t_0.8-1.0=0.6203] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0470 + Deleted old checkpoint: checkpoint_epoch_0467 +[MEM @ epoch 470 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 471/499 (94% done) --- + [Epoch 471][10/40] loss=0.810219 avg=0.858781 VRAM=38.9GiB | 94.2% done | ETA(epoch): 691s + [Epoch 471][20/40] loss=0.756560 avg=0.840130 VRAM=38.8GiB | 94.3% done | ETA(epoch): 461s + [Epoch 471][30/40] loss=0.821205 avg=0.839480 VRAM=38.9GiB | 94.3% done | ETA(epoch): 230s + [Epoch 471][40/40] loss=0.872504 avg=0.839253 VRAM=38.8GiB | 94.4% done | ETA(epoch): 0s + Train loss: 0.839253 (921.1s) ETA: 468min + Val loss: 0.883376 [t_0.0-0.2=1.0688 t_0.2-0.4=0.9944 t_0.4-0.6=0.8302 t_0.6-0.8=0.7164 t_0.8-1.0=0.6397] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0471 + Deleted old checkpoint: checkpoint_epoch_0468 +[MEM @ epoch 471 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 472/499 (94% done) --- + [Epoch 472][10/40] loss=0.916140 avg=0.873909 VRAM=38.9GiB | 94.5% done | ETA(epoch): 690s + [Epoch 472][20/40] loss=0.795820 avg=0.861358 VRAM=38.8GiB | 94.5% done | ETA(epoch): 460s + [Epoch 472][30/40] loss=0.860587 avg=0.856529 VRAM=38.9GiB | 94.5% done | ETA(epoch): 230s + [Epoch 472][40/40] loss=0.720421 avg=0.849854 VRAM=38.8GiB | 94.6% done | ETA(epoch): 0s + Train loss: 0.849854 (920.9s) ETA: 451min + Val loss: 0.877924 [t_0.0-0.2=1.0684 t_0.2-0.4=0.9946 t_0.4-0.6=0.8483 t_0.6-0.8=0.7273 t_0.8-1.0=0.6254] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0472 + Deleted old checkpoint: checkpoint_epoch_0469 +[MEM @ epoch 472 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 473/499 (95% done) --- + [Epoch 473][10/40] loss=0.954936 avg=0.816660 VRAM=38.9GiB | 94.7% done | ETA(epoch): 690s + [Epoch 473][20/40] loss=0.837394 avg=0.844300 VRAM=38.8GiB | 94.7% done | ETA(epoch): 460s + [Epoch 473][30/40] loss=0.828713 avg=0.840879 VRAM=38.9GiB | 94.8% done | ETA(epoch): 230s + [Epoch 473][40/40] loss=0.772013 avg=0.847822 VRAM=38.8GiB | 94.8% done | ETA(epoch): 0s + Train loss: 0.847822 (920.4s) ETA: 434min + Val loss: 0.878200 [t_0.0-0.2=1.0669 t_0.2-0.4=0.9724 t_0.4-0.6=0.8344 t_0.6-0.8=0.7118 t_0.8-1.0=0.6211] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0473 + Deleted old checkpoint: checkpoint_epoch_0470 +[MEM @ epoch 473 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 474/499 (95% done) --- + [Epoch 474][10/40] loss=0.694143 avg=0.845368 VRAM=38.9GiB | 94.8% done | ETA(epoch): 690s + [Epoch 474][20/40] loss=0.770769 avg=0.842084 VRAM=38.8GiB | 94.9% done | ETA(epoch): 460s + [Epoch 474][30/40] loss=0.829818 avg=0.840905 VRAM=38.9GiB | 95.0% done | ETA(epoch): 230s + [Epoch 474][40/40] loss=0.946317 avg=0.844463 VRAM=38.8GiB | 95.0% done | ETA(epoch): 0s + Train loss: 0.844463 (920.7s) ETA: 418min + Val loss: 0.854689 [t_0.0-0.2=1.0891 t_0.2-0.4=0.9924 t_0.4-0.6=0.8510 t_0.6-0.8=0.7081 t_0.8-1.0=0.6271] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0474 + Deleted old checkpoint: checkpoint_epoch_0471 +[MEM @ epoch 474 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 475/499 (95% done) --- + [Epoch 475][10/40] loss=0.729243 avg=0.825472 VRAM=38.9GiB | 95.0% done | ETA(epoch): 690s + [Epoch 475][20/40] loss=0.773236 avg=0.831530 VRAM=38.8GiB | 95.1% done | ETA(epoch): 460s + [Epoch 475][30/40] loss=0.793476 avg=0.835983 VRAM=38.9GiB | 95.2% done | ETA(epoch): 230s + [Epoch 475][40/40] loss=0.816996 avg=0.842160 VRAM=38.8GiB | 95.2% done | ETA(epoch): 0s + Train loss: 0.842160 (920.5s) ETA: 401min + Val loss: 0.825618 [t_0.0-0.2=1.0767 t_0.2-0.4=0.9746 t_0.4-0.6=0.8808 t_0.6-0.8=0.6828 t_0.8-1.0=0.6274] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0475 + Deleted old checkpoint: checkpoint_epoch_0472 +[MEM @ epoch 475 end] RAM: 22.7/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 476/499 (95% done) --- + [Epoch 476][10/40] loss=0.863193 avg=0.874322 VRAM=38.9GiB | 95.2% done | ETA(epoch): 691s + [Epoch 476][20/40] loss=0.984820 avg=0.881883 VRAM=38.8GiB | 95.3% done | ETA(epoch): 460s + [Epoch 476][30/40] loss=0.834544 avg=0.867539 VRAM=38.9GiB | 95.3% done | ETA(epoch): 230s + [Epoch 476][40/40] loss=0.841819 avg=0.857827 VRAM=38.8GiB | 95.4% done | ETA(epoch): 0s + Train loss: 0.857827 (920.8s) ETA: 384min + Val loss: 0.845291 [t_0.0-0.2=1.0467 t_0.2-0.4=0.9792 t_0.4-0.6=0.8273 t_0.6-0.8=0.7232 t_0.8-1.0=0.6342] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0476 + Deleted old checkpoint: checkpoint_epoch_0473 +[MEM @ epoch 476 end] RAM: 22.6/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 477/499 (95% done) --- + [Epoch 477][10/40] loss=0.841249 avg=0.861711 VRAM=38.9GiB | 95.5% done | ETA(epoch): 690s + [Epoch 477][20/40] loss=1.010337 avg=0.845254 VRAM=38.8GiB | 95.5% done | ETA(epoch): 460s + [Epoch 477][30/40] loss=0.851273 avg=0.838508 VRAM=38.9GiB | 95.5% done | ETA(epoch): 230s + [Epoch 477][40/40] loss=0.822219 avg=0.845233 VRAM=38.8GiB | 95.6% done | ETA(epoch): 0s + Train loss: 0.845233 (920.5s) ETA: 367min + Val loss: 0.799508 [t_0.0-0.2=1.0694 t_0.2-0.4=0.9866 t_0.4-0.6=0.8056 t_0.6-0.8=0.7189 t_0.8-1.0=0.6210] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0477 + Deleted old checkpoint: checkpoint_epoch_0474 +[MEM @ epoch 477 end] RAM: 22.7/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 478/499 (96% done) --- + [Epoch 478][10/40] loss=0.802555 avg=0.866017 VRAM=38.9GiB | 95.7% done | ETA(epoch): 691s + [Epoch 478][20/40] loss=0.765312 avg=0.836496 VRAM=38.8GiB | 95.7% done | ETA(epoch): 460s + [Epoch 478][30/40] loss=0.934607 avg=0.846999 VRAM=38.9GiB | 95.8% done | ETA(epoch): 230s + [Epoch 478][40/40] loss=0.793285 avg=0.848997 VRAM=38.8GiB | 95.8% done | ETA(epoch): 0s + Train loss: 0.848997 (920.9s) ETA: 351min + Val loss: 0.837315 [t_0.0-0.2=1.0734 t_0.2-0.4=0.9540 t_0.4-0.6=0.8380 t_0.6-0.8=0.7098 t_0.8-1.0=0.6339] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0478 + Deleted old checkpoint: checkpoint_epoch_0475 +[MEM @ epoch 478 end] RAM: 22.5/188.4 GiB (12.0%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 479/499 (96% done) --- + [Epoch 479][10/40] loss=0.811449 avg=0.840279 VRAM=38.9GiB | 95.9% done | ETA(epoch): 691s + [Epoch 479][20/40] loss=0.837891 avg=0.853693 VRAM=38.8GiB | 95.9% done | ETA(epoch): 461s + [Epoch 479][30/40] loss=0.861171 avg=0.860826 VRAM=38.9GiB | 96.0% done | ETA(epoch): 230s + [Epoch 479][40/40] loss=0.895314 avg=0.861284 VRAM=38.8GiB | 96.0% done | ETA(epoch): 0s + Train loss: 0.861284 (922.0s) ETA: 334min + Val loss: 0.836351 [t_0.0-0.2=1.0643 t_0.2-0.4=0.9923 t_0.4-0.6=0.8584 t_0.6-0.8=0.7265 t_0.8-1.0=0.6356] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0479 + Deleted old checkpoint: checkpoint_epoch_0476 +[MEM @ epoch 479 end] RAM: 22.7/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 480/499 (96% done) --- + [Epoch 480][10/40] loss=0.861773 avg=0.843042 VRAM=38.9GiB | 96.0% done | ETA(epoch): 691s + [Epoch 480][20/40] loss=0.742392 avg=0.842527 VRAM=38.8GiB | 96.1% done | ETA(epoch): 461s + [Epoch 480][30/40] loss=0.855300 avg=0.840116 VRAM=38.9GiB | 96.2% done | ETA(epoch): 230s + [Epoch 480][40/40] loss=0.890875 avg=0.837817 VRAM=38.8GiB | 96.2% done | ETA(epoch): 0s + Train loss: 0.837817 (921.8s) ETA: 317min + Val loss: 0.845799 [t_0.0-0.2=1.0754 t_0.2-0.4=0.9702 t_0.4-0.6=0.8549 t_0.6-0.8=0.7242 t_0.8-1.0=0.6279] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0480 + Deleted old checkpoint: checkpoint_epoch_0477 +[MEM @ epoch 480 end] RAM: 22.8/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 481/499 (96% done) --- + [Epoch 481][10/40] loss=0.828392 avg=0.862946 VRAM=38.9GiB | 96.2% done | ETA(epoch): 691s + [Epoch 481][20/40] loss=0.836637 avg=0.864505 VRAM=38.8GiB | 96.3% done | ETA(epoch): 461s + [Epoch 481][30/40] loss=0.907736 avg=0.854579 VRAM=38.9GiB | 96.4% done | ETA(epoch): 230s + [Epoch 481][40/40] loss=0.895893 avg=0.850539 VRAM=38.8GiB | 96.4% done | ETA(epoch): 0s + Train loss: 0.850539 (921.7s) ETA: 301min + Val loss: 0.851281 [t_0.0-0.2=1.0864 t_0.2-0.4=0.9668 t_0.4-0.6=0.8657 t_0.6-0.8=0.6925 t_0.8-1.0=0.6289] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0481 + Deleted old checkpoint: checkpoint_epoch_0478 +[MEM @ epoch 481 end] RAM: 22.8/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 482/499 (96% done) --- + [Epoch 482][10/40] loss=0.886051 avg=0.848919 VRAM=38.9GiB | 96.5% done | ETA(epoch): 691s + [Epoch 482][20/40] loss=0.712766 avg=0.840519 VRAM=38.8GiB | 96.5% done | ETA(epoch): 461s + [Epoch 482][30/40] loss=0.778191 avg=0.850156 VRAM=38.9GiB | 96.5% done | ETA(epoch): 230s + [Epoch 482][40/40] loss=0.791382 avg=0.846102 VRAM=38.8GiB | 96.6% done | ETA(epoch): 0s + Train loss: 0.846102 (921.9s) ETA: 284min + Val loss: 0.857074 [t_0.0-0.2=1.0706 t_0.2-0.4=0.9758 t_0.4-0.6=0.8247 t_0.6-0.8=0.7108 t_0.8-1.0=0.6228] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0482 + Deleted old checkpoint: checkpoint_epoch_0479 +[MEM @ epoch 482 end] RAM: 22.9/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 483/499 (97% done) --- + [Epoch 483][10/40] loss=0.896798 avg=0.830075 VRAM=38.9GiB | 96.7% done | ETA(epoch): 691s + [Epoch 483][20/40] loss=0.833269 avg=0.837345 VRAM=38.8GiB | 96.7% done | ETA(epoch): 461s + [Epoch 483][30/40] loss=0.877789 avg=0.843053 VRAM=38.9GiB | 96.8% done | ETA(epoch): 230s + [Epoch 483][40/40] loss=0.890464 avg=0.847116 VRAM=38.8GiB | 96.8% done | ETA(epoch): 0s + Train loss: 0.847116 (921.6s) ETA: 267min + Val loss: 0.825100 [t_0.0-0.2=1.0625 t_0.2-0.4=0.9765 t_0.4-0.6=0.8219 t_0.6-0.8=0.7185 t_0.8-1.0=0.6416] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0483 + Deleted old checkpoint: checkpoint_epoch_0480 +[MEM @ epoch 483 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 484/499 (97% done) --- + [Epoch 484][10/40] loss=0.908959 avg=0.822622 VRAM=38.9GiB | 96.9% done | ETA(epoch): 691s + [Epoch 484][20/40] loss=0.733092 avg=0.848956 VRAM=38.8GiB | 96.9% done | ETA(epoch): 461s + [Epoch 484][30/40] loss=0.834623 avg=0.851218 VRAM=38.9GiB | 97.0% done | ETA(epoch): 230s + [Epoch 484][40/40] loss=0.938653 avg=0.852427 VRAM=38.8GiB | 97.0% done | ETA(epoch): 0s + Train loss: 0.852427 (921.4s) ETA: 251min + Val loss: 0.833276 [t_0.0-0.2=1.0613 t_0.2-0.4=1.0054 t_0.4-0.6=0.8580 t_0.6-0.8=0.7028 t_0.8-1.0=0.6402] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0484 + Deleted old checkpoint: checkpoint_epoch_0481 +[MEM @ epoch 484 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 485/499 (97% done) --- + [Epoch 485][10/40] loss=0.914460 avg=0.828367 VRAM=38.9GiB | 97.0% done | ETA(epoch): 691s + [Epoch 485][20/40] loss=0.856862 avg=0.838105 VRAM=38.8GiB | 97.1% done | ETA(epoch): 461s + [Epoch 485][30/40] loss=0.837261 avg=0.842418 VRAM=38.9GiB | 97.2% done | ETA(epoch): 230s + [Epoch 485][40/40] loss=0.899244 avg=0.849765 VRAM=38.8GiB | 97.2% done | ETA(epoch): 0s + Train loss: 0.849765 (921.4s) ETA: 234min + Val loss: 0.870210 [t_0.0-0.2=1.0726 t_0.2-0.4=0.9792 t_0.4-0.6=0.8645 t_0.6-0.8=0.6972 t_0.8-1.0=0.6131] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0485 + Deleted old checkpoint: checkpoint_epoch_0482 +[MEM @ epoch 485 end] RAM: 22.8/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 486/499 (97% done) --- + [Epoch 486][10/40] loss=0.736155 avg=0.862795 VRAM=38.9GiB | 97.2% done | ETA(epoch): 691s + [Epoch 486][20/40] loss=0.767075 avg=0.850937 VRAM=38.8GiB | 97.3% done | ETA(epoch): 461s + [Epoch 486][30/40] loss=0.849009 avg=0.847828 VRAM=38.9GiB | 97.4% done | ETA(epoch): 230s + [Epoch 486][40/40] loss=0.924746 avg=0.846535 VRAM=38.8GiB | 97.4% done | ETA(epoch): 0s + Train loss: 0.846535 (921.4s) ETA: 217min + Val loss: 0.894127 [t_0.0-0.2=1.0707 t_0.2-0.4=1.0145 t_0.4-0.6=0.8657 t_0.6-0.8=0.6815 t_0.8-1.0=0.6269] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0486 + Deleted old checkpoint: checkpoint_epoch_0483 +[MEM @ epoch 486 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 487/499 (97% done) --- + [Epoch 487][10/40] loss=0.859878 avg=0.850508 VRAM=38.9GiB | 97.5% done | ETA(epoch): 691s + [Epoch 487][20/40] loss=0.859774 avg=0.864223 VRAM=38.8GiB | 97.5% done | ETA(epoch): 461s + [Epoch 487][30/40] loss=0.906650 avg=0.859422 VRAM=38.9GiB | 97.5% done | ETA(epoch): 230s + [Epoch 487][40/40] loss=0.806927 avg=0.853971 VRAM=38.8GiB | 97.6% done | ETA(epoch): 0s + Train loss: 0.853971 (921.4s) ETA: 200min + Val loss: 0.868731 [t_0.0-0.2=1.0646 t_0.2-0.4=0.9567 t_0.4-0.6=0.8491 t_0.6-0.8=0.7063 t_0.8-1.0=0.6485] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0487 + Deleted old checkpoint: checkpoint_epoch_0484 +[MEM @ epoch 487 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 488/499 (98% done) --- + [Epoch 488][10/40] loss=0.882149 avg=0.880385 VRAM=38.9GiB | 97.7% done | ETA(epoch): 691s + [Epoch 488][20/40] loss=0.781114 avg=0.848500 VRAM=38.8GiB | 97.7% done | ETA(epoch): 461s + [Epoch 488][30/40] loss=0.903352 avg=0.851801 VRAM=38.9GiB | 97.8% done | ETA(epoch): 230s + [Epoch 488][40/40] loss=0.815721 avg=0.844601 VRAM=38.8GiB | 97.8% done | ETA(epoch): 0s + Train loss: 0.844601 (921.9s) ETA: 184min + Val loss: 0.844116 [t_0.0-0.2=1.0693 t_0.2-0.4=1.0104 t_0.4-0.6=0.8869 t_0.6-0.8=0.6924 t_0.8-1.0=0.6300] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0488 + Deleted old checkpoint: checkpoint_epoch_0485 +[MEM @ epoch 488 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 489/499 (98% done) --- + [Epoch 489][10/40] loss=0.760607 avg=0.838984 VRAM=38.9GiB | 97.9% done | ETA(epoch): 691s + [Epoch 489][20/40] loss=0.874713 avg=0.852606 VRAM=38.8GiB | 97.9% done | ETA(epoch): 461s + [Epoch 489][30/40] loss=0.735052 avg=0.844848 VRAM=38.9GiB | 98.0% done | ETA(epoch): 230s + [Epoch 489][40/40] loss=0.791644 avg=0.844726 VRAM=38.8GiB | 98.0% done | ETA(epoch): 0s + Train loss: 0.844726 (921.5s) ETA: 167min + Val loss: 0.831479 [t_0.0-0.2=1.0592 t_0.2-0.4=0.9773 t_0.4-0.6=0.8535 t_0.6-0.8=0.7189 t_0.8-1.0=0.6234] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0489 + Deleted old checkpoint: checkpoint_epoch_0486 +[MEM @ epoch 489 end] RAM: 22.8/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 490/499 (98% done) --- + [Epoch 490][10/40] loss=0.741748 avg=0.850724 VRAM=38.9GiB | 98.0% done | ETA(epoch): 691s + [Epoch 490][20/40] loss=0.877529 avg=0.855076 VRAM=38.8GiB | 98.1% done | ETA(epoch): 461s + [Epoch 490][30/40] loss=0.750581 avg=0.841383 VRAM=38.9GiB | 98.2% done | ETA(epoch): 230s + [Epoch 490][40/40] loss=0.828995 avg=0.850236 VRAM=38.8GiB | 98.2% done | ETA(epoch): 0s + Train loss: 0.850236 (921.2s) ETA: 150min + Val loss: 0.837923 [t_0.0-0.2=1.0735 t_0.2-0.4=0.9810 t_0.4-0.6=0.8503 t_0.6-0.8=0.7180 t_0.8-1.0=0.6276] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0490 + Deleted old checkpoint: checkpoint_epoch_0487 +[MEM @ epoch 490 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 491/499 (98% done) --- + [Epoch 491][10/40] loss=0.903957 avg=0.831276 VRAM=38.9GiB | 98.2% done | ETA(epoch): 691s + [Epoch 491][20/40] loss=0.895899 avg=0.849838 VRAM=38.8GiB | 98.3% done | ETA(epoch): 461s + [Epoch 491][30/40] loss=0.929917 avg=0.849212 VRAM=38.9GiB | 98.4% done | ETA(epoch): 230s + [Epoch 491][40/40] loss=0.863417 avg=0.849458 VRAM=38.8GiB | 98.4% done | ETA(epoch): 0s + Train loss: 0.849458 (921.6s) ETA: 134min + Val loss: 0.857913 [t_0.0-0.2=1.0836 t_0.2-0.4=1.0002 t_0.4-0.6=0.8475 t_0.6-0.8=0.6906 t_0.8-1.0=0.6345] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0491 + Deleted old checkpoint: checkpoint_epoch_0488 +[MEM @ epoch 491 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 492/499 (98% done) --- + [Epoch 492][10/40] loss=0.882441 avg=0.837307 VRAM=38.9GiB | 98.5% done | ETA(epoch): 691s + [Epoch 492][20/40] loss=1.001307 avg=0.847141 VRAM=38.8GiB | 98.5% done | ETA(epoch): 461s + [Epoch 492][30/40] loss=0.951623 avg=0.847079 VRAM=38.9GiB | 98.6% done | ETA(epoch): 230s + [Epoch 492][40/40] loss=0.935815 avg=0.857900 VRAM=38.8GiB | 98.6% done | ETA(epoch): 0s + Train loss: 0.857900 (921.5s) ETA: 117min + Val loss: 0.836289 [t_0.0-0.2=1.0458 t_0.2-0.4=0.9652 t_0.4-0.6=0.8866 t_0.6-0.8=0.7172 t_0.8-1.0=0.5991] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0492 + Deleted old checkpoint: checkpoint_epoch_0489 +[MEM @ epoch 492 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 493/499 (99% done) --- + [Epoch 493][10/40] loss=0.823141 avg=0.877079 VRAM=38.9GiB | 98.7% done | ETA(epoch): 691s + [Epoch 493][20/40] loss=0.883497 avg=0.864430 VRAM=38.8GiB | 98.7% done | ETA(epoch): 461s + [Epoch 493][30/40] loss=0.828061 avg=0.864030 VRAM=38.9GiB | 98.8% done | ETA(epoch): 230s + [Epoch 493][40/40] loss=0.937072 avg=0.864609 VRAM=38.8GiB | 98.8% done | ETA(epoch): 0s + Train loss: 0.864609 (921.5s) ETA: 100min + Val loss: 0.877635 [t_0.0-0.2=1.0671 t_0.2-0.4=0.9924 t_0.4-0.6=0.8254 t_0.6-0.8=0.7239 t_0.8-1.0=0.6414] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0493 + Deleted old checkpoint: checkpoint_epoch_0490 +[MEM @ epoch 493 end] RAM: 22.9/188.4 GiB (12.1%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 494/499 (99% done) --- + [Epoch 494][10/40] loss=0.939976 avg=0.850419 VRAM=38.9GiB | 98.9% done | ETA(epoch): 690s + [Epoch 494][20/40] loss=0.900787 avg=0.856948 VRAM=38.8GiB | 98.9% done | ETA(epoch): 460s + [Epoch 494][30/40] loss=0.812012 avg=0.845209 VRAM=38.9GiB | 99.0% done | ETA(epoch): 230s + [Epoch 494][40/40] loss=0.789132 avg=0.846425 VRAM=38.8GiB | 99.0% done | ETA(epoch): 0s + Train loss: 0.846425 (921.1s) ETA: 83min + Val loss: 0.811759 [t_0.0-0.2=1.0732 t_0.2-0.4=0.9970 t_0.4-0.6=0.8595 t_0.6-0.8=0.6895 t_0.8-1.0=0.6396] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0494 + Deleted old checkpoint: checkpoint_epoch_0491 +[MEM @ epoch 494 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 495/499 (99% done) --- + [Epoch 495][10/40] loss=0.836484 avg=0.836095 VRAM=38.9GiB | 99.1% done | ETA(epoch): 691s + [Epoch 495][20/40] loss=0.785083 avg=0.836438 VRAM=38.8GiB | 99.1% done | ETA(epoch): 460s + [Epoch 495][30/40] loss=0.817367 avg=0.840817 VRAM=38.9GiB | 99.2% done | ETA(epoch): 230s + [Epoch 495][40/40] loss=0.910720 avg=0.840945 VRAM=38.8GiB | 99.2% done | ETA(epoch): 0s + Train loss: 0.840945 (920.7s) ETA: 67min + Val loss: 0.834984 [t_0.0-0.2=1.0866 t_0.2-0.4=0.9939 t_0.4-0.6=0.8633 t_0.6-0.8=0.7002 t_0.8-1.0=0.6382] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0495 + Deleted old checkpoint: checkpoint_epoch_0492 +[MEM @ epoch 495 end] RAM: 23.1/188.4 GiB (12.3%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 496/499 (99% done) --- + [Epoch 496][10/40] loss=0.943249 avg=0.851833 VRAM=38.9GiB | 99.2% done | ETA(epoch): 691s + [Epoch 496][20/40] loss=0.832430 avg=0.863925 VRAM=38.8GiB | 99.3% done | ETA(epoch): 460s + [Epoch 496][30/40] loss=0.914778 avg=0.856200 VRAM=38.9GiB | 99.4% done | ETA(epoch): 230s + [Epoch 496][40/40] loss=0.802648 avg=0.857578 VRAM=38.8GiB | 99.4% done | ETA(epoch): 0s + Train loss: 0.857578 (921.1s) ETA: 50min + Val loss: 0.855112 [t_0.0-0.2=1.0716 t_0.2-0.4=1.0263 t_0.4-0.6=0.8066 t_0.6-0.8=0.7188 t_0.8-1.0=0.6433] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0496 + Deleted old checkpoint: checkpoint_epoch_0493 +[MEM @ epoch 496 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 497/499 (99% done) --- + [Epoch 497][10/40] loss=0.725604 avg=0.837161 VRAM=38.9GiB | 99.5% done | ETA(epoch): 690s + [Epoch 497][20/40] loss=0.784466 avg=0.838051 VRAM=38.8GiB | 99.5% done | ETA(epoch): 460s + [Epoch 497][30/40] loss=0.876682 avg=0.843165 VRAM=38.9GiB | 99.6% done | ETA(epoch): 230s + [Epoch 497][40/40] loss=0.771847 avg=0.841516 VRAM=38.8GiB | 99.6% done | ETA(epoch): 0s + Train loss: 0.841516 (920.9s) ETA: 33min + Val loss: 0.859866 [t_0.0-0.2=1.0705 t_0.2-0.4=0.9881 t_0.4-0.6=0.8443 t_0.6-0.8=0.7298 t_0.8-1.0=0.6297] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0497 + Deleted old checkpoint: checkpoint_epoch_0494 +[MEM @ epoch 497 end] RAM: 22.9/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 498/499 (100% done) --- + [Epoch 498][10/40] loss=0.802054 avg=0.848383 VRAM=38.9GiB | 99.7% done | ETA(epoch): 690s + [Epoch 498][20/40] loss=0.919648 avg=0.868847 VRAM=38.8GiB | 99.7% done | ETA(epoch): 460s + [Epoch 498][30/40] loss=0.770076 avg=0.854424 VRAM=38.9GiB | 99.8% done | ETA(epoch): 230s + [Epoch 498][40/40] loss=0.877712 avg=0.847612 VRAM=38.8GiB | 99.8% done | ETA(epoch): 0s + Train loss: 0.847612 (921.0s) ETA: 17min + Val loss: 0.859729 [t_0.0-0.2=1.0734 t_0.2-0.4=0.9969 t_0.4-0.6=0.8401 t_0.6-0.8=0.7192 t_0.8-1.0=0.6313] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0498 + Deleted old checkpoint: checkpoint_epoch_0495 +[MEM @ epoch 498 end] RAM: 23.0/188.4 GiB (12.2%) | VRAM: 38.2/139.8 GiB (27.3%) + +--- Epoch 499/499 (100% done) --- + [Epoch 499][10/40] loss=0.878773 avg=0.865926 VRAM=38.9GiB | 99.9% done | ETA(epoch): 691s + [Epoch 499][20/40] loss=0.884549 avg=0.853264 VRAM=38.8GiB | 99.9% done | ETA(epoch): 461s + [Epoch 499][30/40] loss=0.822359 avg=0.852042 VRAM=38.9GiB | 100.0% done | ETA(epoch): 230s + [Epoch 499][40/40] loss=0.895049 avg=0.849662 VRAM=38.8GiB | 100.0% done | ETA(epoch): 0s + [MilestoneVis] train_0 step 19999 ✓ + [MilestoneGrid] train_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_0.png + [MilestoneVis] train_1 step 19999 ✓ + [MilestoneGrid] train_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_train_1.png + [MilestoneVis] val_0 step 19999 ✓ + [MilestoneGrid] val_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_0.png + [MilestoneVis] val_1 step 19999 ✓ + [MilestoneGrid] val_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_val_1.png + [MilestoneVis] test_0 step 19999 ✓ + [MilestoneGrid] test_0 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_0.png + [MilestoneVis] test_1 step 19999 ✓ + [MilestoneGrid] test_1 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_1.png + [MilestoneVis] test_2 step 19999 ✓ + [MilestoneGrid] test_2 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_2.png + [MilestoneVis] test_3 step 19999 ✓ + [MilestoneGrid] test_3 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_3.png + [MilestoneVis] test_4 step 19999 ✓ + [MilestoneGrid] test_4 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_4.png + [MilestoneVis] test_5 step 19999 ✓ + [MilestoneGrid] test_5 → output/lora_rank_128_mlp_H200/milestone_vis/milestone_grid_test_5.png + Train loss: 0.849662 (1681.1s) ETA: 0min + Val loss: 0.852974 [t_0.0-0.2=1.0568 t_0.2-0.4=0.9977 t_0.4-0.6=0.8552 t_0.6-0.8=0.6934 t_0.8-1.0=0.6339] + Checkpoint saved: output/lora_rank_128_mlp_H200/checkpoint_epoch_0499 + Deleted old checkpoint: checkpoint_epoch_0496 +[MEM @ epoch 499 end] RAM: 23.5/188.4 GiB (12.5%) | VRAM: 38.2/139.8 GiB (27.3%) + [LossCurve WARN] could not save loss curve: No module named 'matplotlib' + +====================================================================== +Training complete. +====================================================================== +wandb: updating run metadata (0.0s) + m wandb: updating run metadata (0.0s) + m wandb: updating run metadata (0.0s) + m wandb: updating run metadata (0.0s) + m wandb: updating run metadata (0.5s) + m wandb: updating run metadata (0.5s) + m wandb: updating run metadata (0.5s) + m wandb: updating run metadata (0.5s) + m wandb: updating run metadata (0.5s) + m wandb: uploading wandb-summary.json 3.1KB/3.1KB (0.2s) +wandb: uploading config.yaml 3.3KB/3.3KB (0.2s) + m m wandb: uploading wandb-summary.json 3.1KB/3.1KB (0.2s) +wandb: uploading config.yaml 3.3KB/3.3KB (0.2s) + m m wandb: uploading wandb-summary.json 3.1KB/3.1KB (0.2s) +wandb: uploading config.yaml 3.3KB/3.3KB (0.2s) + m m wandb: uploading wandb-summary.json 3.1KB/3.1KB (0.2s) +wandb: uploading config.yaml 3.3KB/3.3KB (0.2s) + m m wandb: uploading wandb-summary.json 3.1KB/3.1KB (0.2s) +wandb: uploading config.yaml 3.3KB/3.3KB (0.2s) + m m wandb: uploading history steps 3109-3109, summary (0.3s) + m wandb: uploading history steps 3109-3109, summary (0.3s) + m wandb: uploading history steps 3109-3109, summary (0.3s) + m wandb: uploading history steps 3109-3109, summary (0.3s) + m wandb: uploading history steps 3109-3109, summary (0.3s) + m wandb: uploading history steps 3109-3109, summary (0.8s) + m wandb: uploading history steps 3109-3109, summary (0.8s) + m wandb: uploading history steps 3109-3109, summary (0.8s) + m wandb: +wandb: Run summary: +wandb: adapter_lr 0.0 +wandb: best_val_loss 0.79881 +wandb: cpu_pct 0.6 +wandb: epoch 499 +wandb: final_train_loss 0.84966 +wandb: global_step 19999 +wandb: gpu_mem_gib 38.23181 +wandb: step_loss 0.89505 +wandb: total_epochs 500 +wandb: train_loss 0.84966 +wandb: +6 ... +wandb: +wandb: View run lora_rank_128_mlp_H200-train-20260416-164244 at: https://wandb.ai/hku-xg-boost/lora_rank_128_mlp_H200/runs/gv4s3qq7 +wandb: View project at: https://wandb.ai/hku-xg-boost/lora_rank_128_mlp_H200 +wandb: Synced 4 W&B file(s), 110 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/run-20260416_164244-gv4s3qq7/logs