Add scripts and checkpoints (CosFly-Track release)
#72
by Ys404 - opened
This view is limited to 50 files because it contains too many changes. See the raw diff here.
- .gitattributes +8 -0
- .watchdog.log +281 -0
- checkpoints/GLM-4.6V-Flash-SFT/all_results.json +8 -0
- checkpoints/GLM-4.6V-Flash-SFT/chat_template.jinja +140 -0
- checkpoints/GLM-4.6V-Flash-SFT/config.json +72 -0
- checkpoints/GLM-4.6V-Flash-SFT/eval_results_job_glm_glm_46v_flash_20260430_010119.json +56 -0
- checkpoints/GLM-4.6V-Flash-SFT/generation_config.json +16 -0
- checkpoints/GLM-4.6V-Flash-SFT/model.safetensors +3 -0
- checkpoints/GLM-4.6V-Flash-SFT/processor_config.json +63 -0
- checkpoints/GLM-4.6V-Flash-SFT/tokenizer.json +3 -0
- checkpoints/GLM-4.6V-Flash-SFT/tokenizer_config.json +19 -0
- checkpoints/GLM-4.6V-Flash-SFT/train_results.json +8 -0
- checkpoints/GLM-4.6V-Flash-SFT/trainer_state.json +2227 -0
- checkpoints/GLM-4.6V-Flash-SFT/training_loss.png +0 -0
- checkpoints/Gemma-4-E4B-it-SFT/all_results.json +8 -0
- checkpoints/Gemma-4-E4B-it-SFT/chat_template.jinja +263 -0
- checkpoints/Gemma-4-E4B-it-SFT/config.json +199 -0
- checkpoints/Gemma-4-E4B-it-SFT/eval_results_job_gemma_gemma_4_e4b_20260430_011024.json +56 -0
- checkpoints/Gemma-4-E4B-it-SFT/generation_config.json +15 -0
- checkpoints/Gemma-4-E4B-it-SFT/model.safetensors +3 -0
- checkpoints/Gemma-4-E4B-it-SFT/processor_config.json +75 -0
- checkpoints/Gemma-4-E4B-it-SFT/tokenizer.json +3 -0
- checkpoints/Gemma-4-E4B-it-SFT/tokenizer_config.json +96 -0
- checkpoints/Gemma-4-E4B-it-SFT/train_results.json +8 -0
- checkpoints/Gemma-4-E4B-it-SFT/trainer_state.json +2227 -0
- checkpoints/Gemma-4-E4B-it-SFT/training_loss.png +0 -0
- checkpoints/InternVL3.5-8B-SFT/all_results.json +8 -0
- checkpoints/InternVL3.5-8B-SFT/chat_template.jinja +6 -0
- checkpoints/InternVL3.5-8B-SFT/config.json +121 -0
- checkpoints/InternVL3.5-8B-SFT/eval_results_job_internvl35_8b_internvl35_8b_20260430_002347.json +55 -0
- checkpoints/InternVL3.5-8B-SFT/generation_config.json +8 -0
- checkpoints/InternVL3.5-8B-SFT/model.safetensors +3 -0
- checkpoints/InternVL3.5-8B-SFT/processor_config.json +79 -0
- checkpoints/InternVL3.5-8B-SFT/tokenizer.json +3 -0
- checkpoints/InternVL3.5-8B-SFT/tokenizer_config.json +29 -0
- checkpoints/InternVL3.5-8B-SFT/train_results.json +8 -0
- checkpoints/InternVL3.5-8B-SFT/trainer_state.json +2227 -0
- checkpoints/InternVL3.5-8B-SFT/training_loss.png +0 -0
- checkpoints/Qwen3-VL-2B-SFT/all_results.json +8 -0
- checkpoints/Qwen3-VL-2B-SFT/chat_template.jinja +120 -0
- checkpoints/Qwen3-VL-2B-SFT/config.json +71 -0
- checkpoints/Qwen3-VL-2B-SFT/eval_results_job_qwen3vl_2b_qwen3_vl_2b_20260430_002232.json +56 -0
- checkpoints/Qwen3-VL-2B-SFT/generation_config.json +14 -0
- checkpoints/Qwen3-VL-2B-SFT/model.safetensors +3 -0
- checkpoints/Qwen3-VL-2B-SFT/processor_config.json +60 -0
- checkpoints/Qwen3-VL-2B-SFT/tokenizer.json +3 -0
- checkpoints/Qwen3-VL-2B-SFT/tokenizer_config.json +31 -0
- checkpoints/Qwen3-VL-2B-SFT/train_results.json +8 -0
- checkpoints/Qwen3-VL-2B-SFT/trainer_state.json +2227 -0
- checkpoints/Qwen3-VL-2B-SFT/training_loss.png +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoints/Gemma-4-E4B-it-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoints/GLM-4.6V-Flash-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
checkpoints/InternVL3.5-8B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
checkpoints/Qwen3-VL-2B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
checkpoints/Qwen3-VL-8B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
checkpoints/Qwen3.5-0.8B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
checkpoints/Qwen3.5-2B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
checkpoints/Qwen3.5-9B-SFT/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
.watchdog.log
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-05-07 01:23:57] Watchdog started, stall threshold = 300 s
|
| 2 |
+
[2026-05-07 01:23:58] No upload python found. Restarting...
|
| 3 |
+
[2026-05-07 01:23:58] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 4 |
+
[2026-05-07 01:23:58] Restart issued, WMI ReturnValue=0, launcher PID=20160
|
| 5 |
+
[2026-05-07 01:24:18] Tracking PID 22168, init Read=12.72 GB
|
| 6 |
+
[2026-05-07 03:23:40] No upload python found. Restarting...
|
| 7 |
+
[2026-05-07 03:23:40] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 8 |
+
[2026-05-07 03:23:40] Restart issued, WMI ReturnValue=0, launcher PID=27392
|
| 9 |
+
[2026-05-07 03:24:01] Tracking PID 4136, init Read=9.47 GB
|
| 10 |
+
[2026-05-07 03:27:03] No upload python found. Restarting...
|
| 11 |
+
[2026-05-07 03:27:03] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 12 |
+
[2026-05-07 03:27:03] Restart issued, WMI ReturnValue=0, launcher PID=20248
|
| 13 |
+
[2026-05-07 03:27:24] Tracking PID 13668, init Read=10.66 GB
|
| 14 |
+
[2026-05-07 03:30:26] No upload python found. Restarting...
|
| 15 |
+
[2026-05-07 03:30:26] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 16 |
+
[2026-05-07 03:30:26] Restart issued, WMI ReturnValue=0, launcher PID=14656
|
| 17 |
+
[2026-05-07 03:30:46] Tracking PID 11616, init Read=9.98 GB
|
| 18 |
+
[2026-05-07 03:33:48] No upload python found. Restarting...
|
| 19 |
+
[2026-05-07 03:33:48] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 20 |
+
[2026-05-07 03:33:49] Restart issued, WMI ReturnValue=0, launcher PID=26872
|
| 21 |
+
[2026-05-07 03:34:09] Tracking PID 1688, init Read=8.44 GB
|
| 22 |
+
[2026-05-07 03:37:11] No upload python found. Restarting...
|
| 23 |
+
[2026-05-07 03:37:11] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 24 |
+
[2026-05-07 03:37:11] Restart issued, WMI ReturnValue=0, launcher PID=25172
|
| 25 |
+
[2026-05-07 03:37:31] Tracking PID 20240, init Read=10.62 GB
|
| 26 |
+
[2026-05-07 03:40:34] No upload python found. Restarting...
|
| 27 |
+
[2026-05-07 03:40:34] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 28 |
+
[2026-05-07 03:40:34] Restart issued, WMI ReturnValue=0, launcher PID=15440
|
| 29 |
+
[2026-05-07 03:40:54] Tracking PID 7668, init Read=9.31 GB
|
| 30 |
+
[2026-05-07 03:43:56] No upload python found. Restarting...
|
| 31 |
+
[2026-05-07 03:43:56] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 32 |
+
[2026-05-07 03:43:56] Restart issued, WMI ReturnValue=0, launcher PID=12332
|
| 33 |
+
[2026-05-07 03:44:17] Tracking PID 16364, init Read=9.01 GB
|
| 34 |
+
[2026-05-07 03:47:19] No upload python found. Restarting...
|
| 35 |
+
[2026-05-07 03:47:19] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 36 |
+
[2026-05-07 03:47:19] Restart issued, WMI ReturnValue=0, launcher PID=25116
|
| 37 |
+
[2026-05-07 03:47:39] Tracking PID 21724, init Read=8.21 GB
|
| 38 |
+
[2026-05-07 03:50:41] No upload python found. Restarting...
|
| 39 |
+
[2026-05-07 03:50:41] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 40 |
+
[2026-05-07 03:50:42] Restart issued, WMI ReturnValue=0, launcher PID=29036
|
| 41 |
+
[2026-05-07 03:51:02] Tracking PID 28372, init Read=8.72 GB
|
| 42 |
+
[2026-05-07 03:54:04] No upload python found. Restarting...
|
| 43 |
+
[2026-05-07 03:54:04] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 44 |
+
[2026-05-07 03:54:04] Restart issued, WMI ReturnValue=0, launcher PID=29684
|
| 45 |
+
[2026-05-07 03:54:24] Tracking PID 20664, init Read=8.09 GB
|
| 46 |
+
[2026-05-07 03:57:26] No upload python found. Restarting...
|
| 47 |
+
[2026-05-07 03:57:26] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 48 |
+
[2026-05-07 03:57:26] Restart issued, WMI ReturnValue=0, launcher PID=25116
|
| 49 |
+
[2026-05-07 03:57:47] Tracking PID 15052, init Read=8.79 GB
|
| 50 |
+
[2026-05-07 04:00:49] No upload python found. Restarting...
|
| 51 |
+
[2026-05-07 04:00:49] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 52 |
+
[2026-05-07 04:00:49] Restart issued, WMI ReturnValue=0, launcher PID=2668
|
| 53 |
+
[2026-05-07 04:01:09] Tracking PID 8028, init Read=9.38 GB
|
| 54 |
+
[2026-05-07 04:04:11] No upload python found. Restarting...
|
| 55 |
+
[2026-05-07 04:04:11] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 56 |
+
[2026-05-07 04:04:11] Restart issued, WMI ReturnValue=0, launcher PID=4128
|
| 57 |
+
[2026-05-07 04:04:31] Tracking PID 27280, init Read=9.46 GB
|
| 58 |
+
[2026-05-07 04:08:04] No upload python found. Restarting...
|
| 59 |
+
[2026-05-07 04:08:04] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 60 |
+
[2026-05-07 04:08:04] Restart issued, WMI ReturnValue=0, launcher PID=27408
|
| 61 |
+
[2026-05-07 04:08:24] Tracking PID 29060, init Read=10.32 GB
|
| 62 |
+
[2026-05-07 04:11:27] No upload python found. Restarting...
|
| 63 |
+
[2026-05-07 04:11:27] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 64 |
+
[2026-05-07 04:11:27] Restart issued, WMI ReturnValue=0, launcher PID=18400
|
| 65 |
+
[2026-05-07 04:11:47] Tracking PID 28568, init Read=9.09 GB
|
| 66 |
+
[2026-05-07 04:14:49] No upload python found. Restarting...
|
| 67 |
+
[2026-05-07 04:14:49] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 68 |
+
[2026-05-07 04:14:49] Restart issued, WMI ReturnValue=0, launcher PID=25660
|
| 69 |
+
[2026-05-07 04:15:10] Tracking PID 7216, init Read=9.88 GB
|
| 70 |
+
[2026-05-07 04:18:12] No upload python found. Restarting...
|
| 71 |
+
[2026-05-07 04:18:12] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 72 |
+
[2026-05-07 04:18:12] Restart issued, WMI ReturnValue=0, launcher PID=27632
|
| 73 |
+
[2026-05-07 04:18:32] Tracking PID 26584, init Read=8.21 GB
|
| 74 |
+
[2026-05-07 04:21:34] No upload python found. Restarting...
|
| 75 |
+
[2026-05-07 04:21:34] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 76 |
+
[2026-05-07 04:21:34] Restart issued, WMI ReturnValue=0, launcher PID=29684
|
| 77 |
+
[2026-05-07 04:21:54] Tracking PID 1452, init Read=8.94 GB
|
| 78 |
+
[2026-05-07 04:24:57] No upload python found. Restarting...
|
| 79 |
+
[2026-05-07 04:24:57] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 80 |
+
[2026-05-07 04:24:57] Restart issued, WMI ReturnValue=0, launcher PID=23396
|
| 81 |
+
[2026-05-07 04:25:17] Tracking PID 2080, init Read=9.61 GB
|
| 82 |
+
[2026-05-07 04:28:19] No upload python found. Restarting...
|
| 83 |
+
[2026-05-07 04:28:19] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 84 |
+
[2026-05-07 04:28:19] Restart issued, WMI ReturnValue=0, launcher PID=29288
|
| 85 |
+
[2026-05-07 04:28:40] Tracking PID 12628, init Read=9.36 GB
|
| 86 |
+
[2026-05-07 04:31:42] No upload python found. Restarting...
|
| 87 |
+
[2026-05-07 04:31:42] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 88 |
+
[2026-05-07 04:31:42] Restart issued, WMI ReturnValue=0, launcher PID=29080
|
| 89 |
+
[2026-05-07 04:32:02] Tracking PID 20776, init Read=8.87 GB
|
| 90 |
+
[2026-05-07 04:35:04] No upload python found. Restarting...
|
| 91 |
+
[2026-05-07 04:35:04] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 92 |
+
[2026-05-07 04:35:04] Restart issued, WMI ReturnValue=0, launcher PID=25012
|
| 93 |
+
[2026-05-07 04:35:24] Tracking PID 23744, init Read=9.38 GB
|
| 94 |
+
[2026-05-07 04:38:27] No upload python found. Restarting...
|
| 95 |
+
[2026-05-07 04:38:27] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 96 |
+
[2026-05-07 04:38:27] Restart issued, WMI ReturnValue=0, launcher PID=8960
|
| 97 |
+
[2026-05-07 04:38:47] Tracking PID 28516, init Read=8.45 GB
|
| 98 |
+
[2026-05-07 04:42:19] No upload python found. Restarting...
|
| 99 |
+
[2026-05-07 04:42:19] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 100 |
+
[2026-05-07 04:42:20] Restart issued, WMI ReturnValue=0, launcher PID=24896
|
| 101 |
+
[2026-05-07 04:42:40] Tracking PID 20416, init Read=7.3 GB
|
| 102 |
+
[2026-05-07 04:45:42] No upload python found. Restarting...
|
| 103 |
+
[2026-05-07 04:45:42] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 104 |
+
[2026-05-07 04:45:42] Restart issued, WMI ReturnValue=0, launcher PID=16408
|
| 105 |
+
[2026-05-07 04:46:02] Tracking PID 28992, init Read=9.8 GB
|
| 106 |
+
[2026-05-07 04:49:05] No upload python found. Restarting...
|
| 107 |
+
[2026-05-07 04:49:05] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 108 |
+
[2026-05-07 04:49:05] Restart issued, WMI ReturnValue=0, launcher PID=27912
|
| 109 |
+
[2026-05-07 04:49:25] Tracking PID 960, init Read=9.27 GB
|
| 110 |
+
[2026-05-07 04:52:27] No upload python found. Restarting...
|
| 111 |
+
[2026-05-07 04:52:27] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 112 |
+
[2026-05-07 04:52:27] Restart issued, WMI ReturnValue=0, launcher PID=15432
|
| 113 |
+
[2026-05-07 04:52:47] Tracking PID 24880, init Read=9.64 GB
|
| 114 |
+
[2026-05-07 04:56:20] No upload python found. Restarting...
|
| 115 |
+
[2026-05-07 04:56:20] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 116 |
+
[2026-05-07 04:56:20] Restart issued, WMI ReturnValue=0, launcher PID=3744
|
| 117 |
+
[2026-05-07 04:56:41] Tracking PID 25356, init Read=8.27 GB
|
| 118 |
+
[2026-05-07 04:59:43] No upload python found. Restarting...
|
| 119 |
+
[2026-05-07 04:59:43] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 120 |
+
[2026-05-07 04:59:43] Restart issued, WMI ReturnValue=0, launcher PID=27888
|
| 121 |
+
[2026-05-07 05:00:04] Tracking PID 27952, init Read=10.45 GB
|
| 122 |
+
[2026-05-07 05:03:06] No upload python found. Restarting...
|
| 123 |
+
[2026-05-07 05:03:06] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 124 |
+
[2026-05-07 05:03:06] Restart issued, WMI ReturnValue=0, launcher PID=20772
|
| 125 |
+
[2026-05-07 05:03:26] Tracking PID 1456, init Read=10.27 GB
|
| 126 |
+
[2026-05-07 05:06:29] No upload python found. Restarting...
|
| 127 |
+
[2026-05-07 05:06:29] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 128 |
+
[2026-05-07 05:06:29] Restart issued, WMI ReturnValue=0, launcher PID=13848
|
| 129 |
+
[2026-05-07 05:06:49] Tracking PID 28648, init Read=10.36 GB
|
| 130 |
+
[2026-05-07 05:09:51] No upload python found. Restarting...
|
| 131 |
+
[2026-05-07 05:09:51] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 132 |
+
[2026-05-07 05:09:52] Restart issued, WMI ReturnValue=0, launcher PID=6508
|
| 133 |
+
[2026-05-07 05:10:12] Tracking PID 29120, init Read=9.57 GB
|
| 134 |
+
[2026-05-07 05:13:14] No upload python found. Restarting...
|
| 135 |
+
[2026-05-07 05:13:14] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 136 |
+
[2026-05-07 05:13:14] Restart issued, WMI ReturnValue=0, launcher PID=29080
|
| 137 |
+
[2026-05-07 05:13:34] Tracking PID 29408, init Read=7.42 GB
|
| 138 |
+
[2026-05-07 05:17:07] No upload python found. Restarting...
|
| 139 |
+
[2026-05-07 05:17:07] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 140 |
+
[2026-05-07 05:17:07] Restart issued, WMI ReturnValue=0, launcher PID=5536
|
| 141 |
+
[2026-05-07 05:17:27] Tracking PID 24176, init Read=9.01 GB
|
| 142 |
+
[2026-05-07 05:20:29] No upload python found. Restarting...
|
| 143 |
+
[2026-05-07 05:20:29] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 144 |
+
[2026-05-07 05:20:29] Restart issued, WMI ReturnValue=0, launcher PID=27784
|
| 145 |
+
[2026-05-07 05:20:50] Tracking PID 27904, init Read=10.14 GB
|
| 146 |
+
[2026-05-07 05:23:52] No upload python found. Restarting...
|
| 147 |
+
[2026-05-07 05:23:52] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 148 |
+
[2026-05-07 05:23:52] Restart issued, WMI ReturnValue=0, launcher PID=3892
|
| 149 |
+
[2026-05-07 05:24:12] Tracking PID 23124, init Read=8.03 GB
|
| 150 |
+
[2026-05-07 05:27:14] No upload python found. Restarting...
|
| 151 |
+
[2026-05-07 05:27:14] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 152 |
+
[2026-05-07 05:27:14] Restart issued, WMI ReturnValue=0, launcher PID=924
|
| 153 |
+
[2026-05-07 05:27:35] Tracking PID 6124, init Read=8.05 GB
|
| 154 |
+
[2026-05-07 05:30:37] No upload python found. Restarting...
|
| 155 |
+
[2026-05-07 05:30:37] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 156 |
+
[2026-05-07 05:30:37] Restart issued, WMI ReturnValue=0, launcher PID=28232
|
| 157 |
+
[2026-05-07 05:30:57] Tracking PID 1836, init Read=9.41 GB
|
| 158 |
+
[2026-05-07 05:33:59] No upload python found. Restarting...
|
| 159 |
+
[2026-05-07 05:33:59] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 160 |
+
[2026-05-07 05:33:59] Restart issued, WMI ReturnValue=0, launcher PID=29568
|
| 161 |
+
[2026-05-07 05:34:20] Tracking PID 14728, init Read=8.76 GB
|
| 162 |
+
[2026-05-07 05:37:22] No upload python found. Restarting...
|
| 163 |
+
[2026-05-07 05:37:22] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 164 |
+
[2026-05-07 05:37:22] Restart issued, WMI ReturnValue=0, launcher PID=29036
|
| 165 |
+
[2026-05-07 05:37:42] Tracking PID 21932, init Read=9.63 GB
|
| 166 |
+
[2026-05-07 05:40:44] No upload python found. Restarting...
|
| 167 |
+
[2026-05-07 05:40:44] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 168 |
+
[2026-05-07 05:40:44] Restart issued, WMI ReturnValue=0, launcher PID=5956
|
| 169 |
+
[2026-05-07 05:41:05] Tracking PID 16784, init Read=10.13 GB
|
| 170 |
+
[2026-05-07 05:44:07] No upload python found. Restarting...
|
| 171 |
+
[2026-05-07 05:44:07] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 172 |
+
[2026-05-07 05:44:07] Restart issued, WMI ReturnValue=0, launcher PID=29208
|
| 173 |
+
[2026-05-07 05:44:27] Tracking PID 26468, init Read=9.85 GB
|
| 174 |
+
[2026-05-07 05:47:29] No upload python found. Restarting...
|
| 175 |
+
[2026-05-07 05:47:29] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 176 |
+
[2026-05-07 05:47:30] Restart issued, WMI ReturnValue=0, launcher PID=17204
|
| 177 |
+
[2026-05-07 05:47:50] Tracking PID 27924, init Read=9.21 GB
|
| 178 |
+
[2026-05-07 05:50:52] No upload python found. Restarting...
|
| 179 |
+
[2026-05-07 05:50:52] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 180 |
+
[2026-05-07 05:50:52] Restart issued, WMI ReturnValue=0, launcher PID=7704
|
| 181 |
+
[2026-05-07 05:51:12] Tracking PID 25912, init Read=10.19 GB
|
| 182 |
+
[2026-05-07 05:54:14] No upload python found. Restarting...
|
| 183 |
+
[2026-05-07 05:54:14] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 184 |
+
[2026-05-07 05:54:14] Restart issued, WMI ReturnValue=0, launcher PID=28952
|
| 185 |
+
[2026-05-07 05:54:35] Tracking PID 29272, init Read=8.83 GB
|
| 186 |
+
[2026-05-07 05:57:37] No upload python found. Restarting...
|
| 187 |
+
[2026-05-07 05:57:37] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 188 |
+
[2026-05-07 05:57:37] Restart issued, WMI ReturnValue=0, launcher PID=25524
|
| 189 |
+
[2026-05-07 05:57:58] Tracking PID 8760, init Read=7.8 GB
|
| 190 |
+
[2026-05-07 06:01:00] No upload python found. Restarting...
|
| 191 |
+
[2026-05-07 06:01:00] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 192 |
+
[2026-05-07 06:01:00] Restart issued, WMI ReturnValue=0, launcher PID=29016
|
| 193 |
+
[2026-05-07 06:01:20] Tracking PID 8040, init Read=9.98 GB
|
| 194 |
+
[2026-05-07 06:04:22] No upload python found. Restarting...
|
| 195 |
+
[2026-05-07 06:04:22] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 196 |
+
[2026-05-07 06:04:22] Restart issued, WMI ReturnValue=0, launcher PID=28840
|
| 197 |
+
[2026-05-07 06:04:42] Tracking PID 25172, init Read=8.78 GB
|
| 198 |
+
[2026-05-07 06:07:44] No upload python found. Restarting...
|
| 199 |
+
[2026-05-07 06:07:44] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 200 |
+
[2026-05-07 06:07:44] Restart issued, WMI ReturnValue=0, launcher PID=14524
|
| 201 |
+
[2026-05-07 06:08:05] Tracking PID 11872, init Read=8.56 GB
|
| 202 |
+
[2026-05-07 06:11:07] No upload python found. Restarting...
|
| 203 |
+
[2026-05-07 06:11:07] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 204 |
+
[2026-05-07 06:11:07] Restart issued, WMI ReturnValue=0, launcher PID=13756
|
| 205 |
+
[2026-05-07 06:11:27] Tracking PID 15716, init Read=10.04 GB
|
| 206 |
+
[2026-05-07 06:15:00] No upload python found. Restarting...
|
| 207 |
+
[2026-05-07 06:15:00] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 208 |
+
[2026-05-07 06:15:00] Restart issued, WMI ReturnValue=0, launcher PID=28472
|
| 209 |
+
[2026-05-07 06:15:20] Tracking PID 20180, init Read=7.92 GB
|
| 210 |
+
[2026-05-07 06:18:22] No upload python found. Restarting...
|
| 211 |
+
[2026-05-07 06:18:22] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 212 |
+
[2026-05-07 06:18:22] Restart issued, WMI ReturnValue=0, launcher PID=15712
|
| 213 |
+
[2026-05-07 06:18:43] Tracking PID 12508, init Read=9.04 GB
|
| 214 |
+
[2026-05-07 06:21:45] No upload python found. Restarting...
|
| 215 |
+
[2026-05-07 06:21:45] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 216 |
+
[2026-05-07 06:21:45] Restart issued, WMI ReturnValue=0, launcher PID=22588
|
| 217 |
+
[2026-05-07 06:22:06] Tracking PID 20564, init Read=8.24 GB
|
| 218 |
+
[2026-05-07 06:25:07] No upload python found. Restarting...
|
| 219 |
+
[2026-05-07 06:25:07] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 220 |
+
[2026-05-07 06:25:08] Restart issued, WMI ReturnValue=0, launcher PID=21216
|
| 221 |
+
[2026-05-07 06:25:28] Tracking PID 27056, init Read=8.88 GB
|
| 222 |
+
[2026-05-07 06:28:30] No upload python found. Restarting...
|
| 223 |
+
[2026-05-07 06:28:30] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 224 |
+
[2026-05-07 06:28:30] Restart issued, WMI ReturnValue=0, launcher PID=15504
|
| 225 |
+
[2026-05-07 06:28:50] Tracking PID 23240, init Read=8.62 GB
|
| 226 |
+
[2026-05-07 06:31:53] No upload python found. Restarting...
|
| 227 |
+
[2026-05-07 06:31:53] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 228 |
+
[2026-05-07 06:31:53] Restart issued, WMI ReturnValue=0, launcher PID=12632
|
| 229 |
+
[2026-05-07 06:32:13] Tracking PID 29112, init Read=7.91 GB
|
| 230 |
+
[2026-05-07 06:35:46] No upload python found. Restarting...
|
| 231 |
+
[2026-05-07 06:35:46] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 232 |
+
[2026-05-07 06:35:46] Restart issued, WMI ReturnValue=0, launcher PID=6688
|
| 233 |
+
[2026-05-07 06:36:06] Tracking PID 1880, init Read=9.38 GB
|
| 234 |
+
[2026-05-07 06:39:08] No upload python found. Restarting...
|
| 235 |
+
[2026-05-07 06:39:08] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 236 |
+
[2026-05-07 06:39:08] Restart issued, WMI ReturnValue=0, launcher PID=28860
|
| 237 |
+
[2026-05-07 06:39:28] Tracking PID 20996, init Read=10.1 GB
|
| 238 |
+
[2026-05-07 06:42:31] No upload python found. Restarting...
|
| 239 |
+
[2026-05-07 06:42:31] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 240 |
+
[2026-05-07 06:42:31] Restart issued, WMI ReturnValue=0, launcher PID=12436
|
| 241 |
+
[2026-05-07 06:42:51] Tracking PID 23428, init Read=8.12 GB
|
| 242 |
+
[2026-05-07 06:45:53] No upload python found. Restarting...
|
| 243 |
+
[2026-05-07 06:45:53] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 244 |
+
[2026-05-07 06:45:53] Restart issued, WMI ReturnValue=0, launcher PID=15440
|
| 245 |
+
[2026-05-07 06:46:14] Tracking PID 26756, init Read=9.91 GB
|
| 246 |
+
[2026-05-07 06:49:16] No upload python found. Restarting...
|
| 247 |
+
[2026-05-07 06:49:16] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 248 |
+
[2026-05-07 06:49:16] Restart issued, WMI ReturnValue=0, launcher PID=28312
|
| 249 |
+
[2026-05-07 06:49:36] Tracking PID 13260, init Read=8.84 GB
|
| 250 |
+
[2026-05-07 06:53:09] No upload python found. Restarting...
|
| 251 |
+
[2026-05-07 06:53:09] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 252 |
+
[2026-05-07 06:53:09] Restart issued, WMI ReturnValue=0, launcher PID=13476
|
| 253 |
+
[2026-05-07 06:53:29] Tracking PID 18072, init Read=8.38 GB
|
| 254 |
+
[2026-05-07 06:56:32] No upload python found. Restarting...
|
| 255 |
+
[2026-05-07 06:56:32] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 256 |
+
[2026-05-07 06:56:32] Restart issued, WMI ReturnValue=0, launcher PID=17460
|
| 257 |
+
[2026-05-07 06:56:52] Tracking PID 29056, init Read=8.07 GB
|
| 258 |
+
[2026-05-07 07:00:25] No upload python found. Restarting...
|
| 259 |
+
[2026-05-07 07:00:25] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 260 |
+
[2026-05-07 07:00:25] Restart issued, WMI ReturnValue=0, launcher PID=29528
|
| 261 |
+
[2026-05-07 07:00:45] Tracking PID 21456, init Read=9.85 GB
|
| 262 |
+
[2026-05-07 07:03:47] No upload python found. Restarting...
|
| 263 |
+
[2026-05-07 07:03:47] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 264 |
+
[2026-05-07 07:03:47] Restart issued, WMI ReturnValue=0, launcher PID=24264
|
| 265 |
+
[2026-05-07 07:04:08] Tracking PID 3440, init Read=9.81 GB
|
| 266 |
+
[2026-05-07 07:07:10] No upload python found. Restarting...
|
| 267 |
+
[2026-05-07 07:07:10] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 268 |
+
[2026-05-07 07:07:10] Restart issued, WMI ReturnValue=0, launcher PID=19964
|
| 269 |
+
[2026-05-07 07:07:30] Tracking PID 29076, init Read=10.53 GB
|
| 270 |
+
[2026-05-07 07:11:03] No upload python found. Restarting...
|
| 271 |
+
[2026-05-07 07:11:03] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 272 |
+
[2026-05-07 07:11:03] Restart issued, WMI ReturnValue=0, launcher PID=5848
|
| 273 |
+
[2026-05-07 07:11:23] Tracking PID 20508, init Read=9.59 GB
|
| 274 |
+
[2026-05-07 07:14:26] No upload python found. Restarting...
|
| 275 |
+
[2026-05-07 07:14:26] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 276 |
+
[2026-05-07 07:14:26] Restart issued, WMI ReturnValue=0, launcher PID=23236
|
| 277 |
+
[2026-05-07 07:14:46] Tracking PID 2916, init Read=8.73 GB
|
| 278 |
+
[2026-05-07 07:17:48] No upload python found. Restarting...
|
| 279 |
+
[2026-05-07 07:17:48] Restarting upload (LFS dedup will skip already uploaded chunks)...
|
| 280 |
+
[2026-05-07 07:17:48] Restart issued, WMI ReturnValue=0, launcher PID=29588
|
| 281 |
+
[2026-05-07 07:18:09] Tracking PID 26428, init Read=9.4 GB
|
checkpoints/GLM-4.6V-Flash-SFT/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 2477163648385024.0,
|
| 4 |
+
"train_loss": 0.20598802658081056,
|
| 5 |
+
"train_runtime": 35266.4791,
|
| 6 |
+
"train_samples_per_second": 5.671,
|
| 7 |
+
"train_steps_per_second": 0.089
|
| 8 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/chat_template.jinja
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[gMASK]<sop>
|
| 2 |
+
{%- if tools -%}
|
| 3 |
+
<|system|>
|
| 4 |
+
# Tools
|
| 5 |
+
|
| 6 |
+
You may call one or more functions to assist with the user query.
|
| 7 |
+
|
| 8 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 9 |
+
<tools>
|
| 10 |
+
{% for tool in tools %}
|
| 11 |
+
{{ tool | tojson(ensure_ascii=False) }}
|
| 12 |
+
{% endfor %}
|
| 13 |
+
</tools>
|
| 14 |
+
|
| 15 |
+
For each function call, output the function name and arguments within the following XML format:
|
| 16 |
+
<tool_call>{function-name}
|
| 17 |
+
<arg_key>{arg-key-1}</arg_key>
|
| 18 |
+
<arg_value>{arg-value-1}</arg_value>
|
| 19 |
+
<arg_key>{arg-key-2}</arg_key>
|
| 20 |
+
<arg_value>{arg-value-2}</arg_value>
|
| 21 |
+
...
|
| 22 |
+
</tool_call>{%- endif -%}
|
| 23 |
+
{%- macro visible_text(content) -%}
|
| 24 |
+
{%- if content is string -%}
|
| 25 |
+
{{- content }}
|
| 26 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 27 |
+
{%- for item in content -%}
|
| 28 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 29 |
+
{{- item.text }}
|
| 30 |
+
{%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
|
| 31 |
+
<|begin_of_image|><|image|><|end_of_image|>
|
| 32 |
+
{%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
|
| 33 |
+
<|begin_of_video|><|video|><|end_of_video|>
|
| 34 |
+
{%- elif item is string -%}
|
| 35 |
+
{{- item }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endfor -%}
|
| 38 |
+
{%- else -%}
|
| 39 |
+
{{- content }}
|
| 40 |
+
{%- endif -%}
|
| 41 |
+
{%- endmacro -%}
|
| 42 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 43 |
+
{%- for m in messages %}
|
| 44 |
+
{%- if m.role == 'user' %}
|
| 45 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 46 |
+
{%- endif %}
|
| 47 |
+
{%- endfor %}
|
| 48 |
+
{% for m in messages %}
|
| 49 |
+
{%- if m.role == 'user' -%}<|user|>
|
| 50 |
+
{% if m.content is string %}
|
| 51 |
+
{{ m.content }}
|
| 52 |
+
{%- else %}
|
| 53 |
+
{%- for item in m.content %}
|
| 54 |
+
{% if item.type == 'video' or 'video' in item %}
|
| 55 |
+
<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
|
| 56 |
+
<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
|
| 57 |
+
{{ item.text }}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endfor %}
|
| 60 |
+
{%- endif %}
|
| 61 |
+
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
|
| 62 |
+
{%- elif m.role == 'assistant' -%}
|
| 63 |
+
<|assistant|>
|
| 64 |
+
{%- set reasoning_content = '' %}
|
| 65 |
+
{%- set content = visible_text(m.content) %}
|
| 66 |
+
{%- if m.reasoning_content is string %}
|
| 67 |
+
{%- set reasoning_content = m.reasoning_content %}
|
| 68 |
+
{%- else %}
|
| 69 |
+
{%- if '</think>' in content %}
|
| 70 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 71 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- endif %}
|
| 74 |
+
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
|
| 75 |
+
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
|
| 76 |
+
{%- else -%}
|
| 77 |
+
{{ '\n<think></think>' }}
|
| 78 |
+
{%- endif -%}
|
| 79 |
+
{%- if content.strip() -%}
|
| 80 |
+
{{ '\n' + content.strip() }}
|
| 81 |
+
{%- endif -%}
|
| 82 |
+
{% if m.tool_calls %}
|
| 83 |
+
{% for tc in m.tool_calls %}
|
| 84 |
+
{%- if tc.function %}
|
| 85 |
+
{%- set tc = tc.function %}
|
| 86 |
+
{%- endif %}
|
| 87 |
+
{{ '\n<tool_call>' + tc.name }}
|
| 88 |
+
{% set _args = tc.arguments %}
|
| 89 |
+
{% for k, v in _args.items() %}
|
| 90 |
+
<arg_key>{{ k }}</arg_key>
|
| 91 |
+
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
|
| 92 |
+
{% endfor %}
|
| 93 |
+
</tool_call>{% endfor %}
|
| 94 |
+
{% endif %}
|
| 95 |
+
{%- elif m.role == 'tool' -%}
|
| 96 |
+
{%- if m.content is string -%}
|
| 97 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 98 |
+
{{- '<|observation|>' }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{{- '\n<tool_response>\n' }}
|
| 101 |
+
{{- m.content }}
|
| 102 |
+
{{- '\n</tool_response>' }}
|
| 103 |
+
{% elif m.content is iterable and m.content is not mapping %}
|
| 104 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 105 |
+
{{- '<|observation|>' }}
|
| 106 |
+
{%- endif %}
|
| 107 |
+
{{- '\n<tool_response>\n' }}
|
| 108 |
+
{%- for tr in m.content -%}
|
| 109 |
+
{%- if tr is mapping and tr.type is defined -%}
|
| 110 |
+
{%- set t = tr.type | lower -%}
|
| 111 |
+
{%- if t == 'text' and tr.text is defined -%}
|
| 112 |
+
{{ tr.text }}
|
| 113 |
+
{%- elif t in ['image', 'image_url'] -%}
|
| 114 |
+
<|begin_of_image|><|image|><|end_of_image|>
|
| 115 |
+
{%- elif t in ['video', 'video_url'] -%}
|
| 116 |
+
<|begin_of_video|><|video|><|end_of_video|>
|
| 117 |
+
{%- else -%}
|
| 118 |
+
{{ tr | tojson(ensure_ascii=False) }}
|
| 119 |
+
{%- endif -%}
|
| 120 |
+
{%- else -%}
|
| 121 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 122 |
+
{%- endif -%}
|
| 123 |
+
{%- endfor -%}
|
| 124 |
+
{{- '\n</tool_response>' }}
|
| 125 |
+
{%- else -%}
|
| 126 |
+
<|observation|>{% for tr in m.content %}
|
| 127 |
+
|
| 128 |
+
<tool_response>
|
| 129 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 130 |
+
</tool_response>{% endfor -%}
|
| 131 |
+
{% endif -%}
|
| 132 |
+
{%- elif m.role == 'system' -%}
|
| 133 |
+
<|system|>
|
| 134 |
+
{{ visible_text(m.content) }}
|
| 135 |
+
{%- endif -%}
|
| 136 |
+
{%- endfor -%}
|
| 137 |
+
{%- if add_generation_prompt -%}
|
| 138 |
+
<|assistant|>
|
| 139 |
+
{{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
|
| 140 |
+
{%- endif -%}
|
checkpoints/GLM-4.6V-Flash-SFT/config.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Glm4vForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"eos_token_id": 151329,
|
| 7 |
+
"hidden_size": 4096,
|
| 8 |
+
"image_end_token_id": 151340,
|
| 9 |
+
"image_start_token_id": 151339,
|
| 10 |
+
"image_token_id": 151363,
|
| 11 |
+
"model_type": "glm4v",
|
| 12 |
+
"pad_token_id": 151329,
|
| 13 |
+
"text_config": {
|
| 14 |
+
"attention_bias": true,
|
| 15 |
+
"attention_dropout": 0.0,
|
| 16 |
+
"dtype": "bfloat16",
|
| 17 |
+
"eos_token_id": [
|
| 18 |
+
151329,
|
| 19 |
+
151336,
|
| 20 |
+
151338
|
| 21 |
+
],
|
| 22 |
+
"hidden_act": "silu",
|
| 23 |
+
"hidden_size": 4096,
|
| 24 |
+
"initializer_range": 0.02,
|
| 25 |
+
"intermediate_size": 13696,
|
| 26 |
+
"max_position_embeddings": 131072,
|
| 27 |
+
"model_type": "glm4v_text",
|
| 28 |
+
"num_attention_heads": 32,
|
| 29 |
+
"num_hidden_layers": 40,
|
| 30 |
+
"num_key_value_heads": 2,
|
| 31 |
+
"pad_token_id": 151329,
|
| 32 |
+
"rms_norm_eps": 1e-05,
|
| 33 |
+
"rope_parameters": {
|
| 34 |
+
"mrope_section": [
|
| 35 |
+
8,
|
| 36 |
+
12,
|
| 37 |
+
12
|
| 38 |
+
],
|
| 39 |
+
"partial_rotary_factor": 0.5,
|
| 40 |
+
"rope_theta": 500000,
|
| 41 |
+
"rope_type": "default"
|
| 42 |
+
},
|
| 43 |
+
"use_cache": false,
|
| 44 |
+
"vocab_size": 151552
|
| 45 |
+
},
|
| 46 |
+
"tie_word_embeddings": false,
|
| 47 |
+
"transformers_version": "5.5.3",
|
| 48 |
+
"use_cache": false,
|
| 49 |
+
"video_end_token_id": 151342,
|
| 50 |
+
"video_start_token_id": 151341,
|
| 51 |
+
"video_token_id": 151364,
|
| 52 |
+
"vision_config": {
|
| 53 |
+
"attention_bias": false,
|
| 54 |
+
"attention_dropout": 0.0,
|
| 55 |
+
"depth": 24,
|
| 56 |
+
"dtype": "bfloat16",
|
| 57 |
+
"hidden_act": "silu",
|
| 58 |
+
"hidden_dropout_prob": 0.0,
|
| 59 |
+
"hidden_size": 1536,
|
| 60 |
+
"image_size": 336,
|
| 61 |
+
"in_channels": 3,
|
| 62 |
+
"initializer_range": 0.02,
|
| 63 |
+
"intermediate_size": 13696,
|
| 64 |
+
"model_type": "glm4v_vision",
|
| 65 |
+
"num_heads": 12,
|
| 66 |
+
"out_hidden_size": 4096,
|
| 67 |
+
"patch_size": 14,
|
| 68 |
+
"rms_norm_eps": 1e-05,
|
| 69 |
+
"spatial_merge_size": 2,
|
| 70 |
+
"temporal_patch_size": 2
|
| 71 |
+
}
|
| 72 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/eval_results_job_glm_glm_46v_flash_20260430_010119.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mae_dx": 0.1517896551724138,
|
| 3 |
+
"rmse_dx": 0.5050280292665226,
|
| 4 |
+
"mae_dy": 0.13570689655172413,
|
| 5 |
+
"rmse_dy": 0.40379185488190017,
|
| 6 |
+
"mae_dz": 0.017967241379310345,
|
| 7 |
+
"rmse_dz": 0.15680698656144998,
|
| 8 |
+
"mae_dpitch": 0.24627758620689652,
|
| 9 |
+
"rmse_dpitch": 0.5965444891927231,
|
| 10 |
+
"mae_dyaw": 1.0261448275862068,
|
| 11 |
+
"rmse_dyaw": 2.459724339755617,
|
| 12 |
+
"mae_droll": 0.0,
|
| 13 |
+
"rmse_droll": 0.0,
|
| 14 |
+
"mae_overall": 0.26298103448275856,
|
| 15 |
+
"mae_position": 0.10182126436781609,
|
| 16 |
+
"mae_rotation": 0.42414080459770115,
|
| 17 |
+
"rmse_overall": 1.068394337204253,
|
| 18 |
+
"wp1_euc_mae": 0.0698010264307822,
|
| 19 |
+
"wp1_euc_median": 0.01999999999999999,
|
| 20 |
+
"wp2_euc_mae": 0.1401695004658457,
|
| 21 |
+
"wp2_euc_median": 0.04123105625617661,
|
| 22 |
+
"wp3_euc_mae": 0.22301934350856006,
|
| 23 |
+
"wp3_euc_median": 0.07211102550927984,
|
| 24 |
+
"wp4_euc_mae": 0.32865394783587415,
|
| 25 |
+
"wp4_euc_median": 0.1104536101718727,
|
| 26 |
+
"wp5_euc_mae": 0.44338792793915116,
|
| 27 |
+
"wp5_euc_median": 0.15905694150420963,
|
| 28 |
+
"euclidean_mae": 0.24100634923604267,
|
| 29 |
+
"ADE": 0.24100634923604267,
|
| 30 |
+
"FDE": 0.44338792793915116,
|
| 31 |
+
"ADE_median": 0.08327688731593763,
|
| 32 |
+
"FDE_median": 0.15905694150420963,
|
| 33 |
+
"SR@0.5m": 0.8951724137931034,
|
| 34 |
+
"SR@1.0m": 0.9513793103448276,
|
| 35 |
+
"SR@2.0m": 0.9808620689655172,
|
| 36 |
+
"SR@5.0m": 0.9968965517241379,
|
| 37 |
+
"TrajSR@1.0m": 0.8974137931034483,
|
| 38 |
+
"TrajSR@2.0m": 0.9577586206896552,
|
| 39 |
+
"TrajSR@5.0m": 0.9922413793103448,
|
| 40 |
+
"RotAcc@1.0deg": 0.7027586206896552,
|
| 41 |
+
"RotAcc@5.0deg": 0.9586206896551724,
|
| 42 |
+
"RotAcc@10.0deg": 0.9889655172413793,
|
| 43 |
+
"wp1_rot_mae": 0.5029051706109685,
|
| 44 |
+
"wp2_rot_mae": 0.7513635215329055,
|
| 45 |
+
"wp3_rot_mae": 1.0546360645612183,
|
| 46 |
+
"wp4_rot_mae": 1.4243170022546052,
|
| 47 |
+
"wp5_rot_mae": 1.784744600833039,
|
| 48 |
+
"rotation_euc_mae": 1.1035932719585473,
|
| 49 |
+
"parse_failure_rate": 0.0,
|
| 50 |
+
"parse_success_rate": 1.0,
|
| 51 |
+
"valid_samples": 1160,
|
| 52 |
+
"total_samples": 1160,
|
| 53 |
+
"parse_failures": 0,
|
| 54 |
+
"inference_engine": "vllm",
|
| 55 |
+
"vllm_version": "0.19.0"
|
| 56 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/generation_config.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151329,
|
| 6 |
+
151329,
|
| 7 |
+
151336,
|
| 8 |
+
151338,
|
| 9 |
+
151348
|
| 10 |
+
],
|
| 11 |
+
"pad_token_id": 151329,
|
| 12 |
+
"temperature": 0.8,
|
| 13 |
+
"top_k": 2,
|
| 14 |
+
"top_p": 0.6,
|
| 15 |
+
"transformers_version": "5.5.3"
|
| 16 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8a32229e6fe30d156e4259207d341d5b0022d08d8df59cd08760bf85cd5d215
|
| 3 |
+
size 20585645128
|
checkpoints/GLM-4.6V-Flash-SFT/processor_config.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"do_convert_rgb": true,
|
| 4 |
+
"do_normalize": true,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.48145466,
|
| 9 |
+
0.4578275,
|
| 10 |
+
0.40821073
|
| 11 |
+
],
|
| 12 |
+
"image_processor_type": "Glm46VImageProcessor",
|
| 13 |
+
"image_std": [
|
| 14 |
+
0.26862954,
|
| 15 |
+
0.26130258,
|
| 16 |
+
0.27577711
|
| 17 |
+
],
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"patch_size": 14,
|
| 20 |
+
"resample": 3,
|
| 21 |
+
"rescale_factor": 0.00392156862745098,
|
| 22 |
+
"size": {
|
| 23 |
+
"longest_edge": 9633792,
|
| 24 |
+
"shortest_edge": 12544
|
| 25 |
+
},
|
| 26 |
+
"temporal_patch_size": 2
|
| 27 |
+
},
|
| 28 |
+
"processor_class": "Glm46VProcessor",
|
| 29 |
+
"video_processor": {
|
| 30 |
+
"do_convert_rgb": true,
|
| 31 |
+
"do_normalize": true,
|
| 32 |
+
"do_rescale": true,
|
| 33 |
+
"do_resize": true,
|
| 34 |
+
"do_sample_frames": true,
|
| 35 |
+
"fps": 2,
|
| 36 |
+
"image_mean": [
|
| 37 |
+
0.48145466,
|
| 38 |
+
0.4578275,
|
| 39 |
+
0.40821073
|
| 40 |
+
],
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.26862954,
|
| 43 |
+
0.26130258,
|
| 44 |
+
0.27577711
|
| 45 |
+
],
|
| 46 |
+
"max_duration": 300,
|
| 47 |
+
"max_image_size": {
|
| 48 |
+
"longest_edge": 47040000
|
| 49 |
+
},
|
| 50 |
+
"merge_size": 2,
|
| 51 |
+
"num_frames": 16,
|
| 52 |
+
"patch_size": 14,
|
| 53 |
+
"resample": 3,
|
| 54 |
+
"rescale_factor": 0.00392156862745098,
|
| 55 |
+
"return_metadata": false,
|
| 56 |
+
"size": {
|
| 57 |
+
"longest_edge": 100352000,
|
| 58 |
+
"shortest_edge": 12544
|
| 59 |
+
},
|
| 60 |
+
"temporal_patch_size": 2,
|
| 61 |
+
"video_processor_type": "Glm46VVideoProcessor"
|
| 62 |
+
}
|
| 63 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eecde1f225a86abef606164ceeb446737e592c4e7a40afe5cbf3ce8328e3df99
|
| 3 |
+
size 19970886
|
checkpoints/GLM-4.6V-Flash-SFT/tokenizer_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"clean_up_tokenization_spaces": false,
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"eos_token": "<|endoftext|>",
|
| 6 |
+
"extra_special_tokens": [
|
| 7 |
+
"<|user|>",
|
| 8 |
+
"<|observation|>",
|
| 9 |
+
"</answer>"
|
| 10 |
+
],
|
| 11 |
+
"is_local": true,
|
| 12 |
+
"model_max_length": 128000,
|
| 13 |
+
"pad_token": "<|endoftext|>",
|
| 14 |
+
"padding_side": "right",
|
| 15 |
+
"processor_class": "Glm46VProcessor",
|
| 16 |
+
"remove_space": false,
|
| 17 |
+
"split_special_tokens": false,
|
| 18 |
+
"tokenizer_class": "TokenizersBackend"
|
| 19 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 2477163648385024.0,
|
| 4 |
+
"train_loss": 0.20598802658081056,
|
| 5 |
+
"train_runtime": 35266.4791,
|
| 6 |
+
"train_samples_per_second": 5.671,
|
| 7 |
+
"train_steps_per_second": 0.089
|
| 8 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/trainer_state.json
ADDED
|
@@ -0,0 +1,2227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3125,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0032,
|
| 14 |
+
"grad_norm": 20.093808181688754,
|
| 15 |
+
"learning_rate": 1.437699680511182e-07,
|
| 16 |
+
"loss": 0.7523126602172852,
|
| 17 |
+
"step": 10
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.0064,
|
| 21 |
+
"grad_norm": 16.520568445399164,
|
| 22 |
+
"learning_rate": 3.0351437699680514e-07,
|
| 23 |
+
"loss": 0.684361743927002,
|
| 24 |
+
"step": 20
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.0096,
|
| 28 |
+
"grad_norm": 7.062991511064744,
|
| 29 |
+
"learning_rate": 4.6325878594249205e-07,
|
| 30 |
+
"loss": 0.46736898422241213,
|
| 31 |
+
"step": 30
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.0128,
|
| 35 |
+
"grad_norm": 1.0572338350229438,
|
| 36 |
+
"learning_rate": 6.230031948881789e-07,
|
| 37 |
+
"loss": 0.3222517013549805,
|
| 38 |
+
"step": 40
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.016,
|
| 42 |
+
"grad_norm": 0.768970780796944,
|
| 43 |
+
"learning_rate": 7.82747603833866e-07,
|
| 44 |
+
"loss": 0.29146518707275393,
|
| 45 |
+
"step": 50
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0192,
|
| 49 |
+
"grad_norm": 0.8158618748659492,
|
| 50 |
+
"learning_rate": 9.424920127795528e-07,
|
| 51 |
+
"loss": 0.28341834545135497,
|
| 52 |
+
"step": 60
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.0224,
|
| 56 |
+
"grad_norm": 0.7218086220464439,
|
| 57 |
+
"learning_rate": 1.1022364217252397e-06,
|
| 58 |
+
"loss": 0.2903137683868408,
|
| 59 |
+
"step": 70
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.0256,
|
| 63 |
+
"grad_norm": 0.7459109221323802,
|
| 64 |
+
"learning_rate": 1.2619808306709266e-06,
|
| 65 |
+
"loss": 0.2718811988830566,
|
| 66 |
+
"step": 80
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.0288,
|
| 70 |
+
"grad_norm": 0.7186860317140319,
|
| 71 |
+
"learning_rate": 1.4217252396166134e-06,
|
| 72 |
+
"loss": 0.2660067558288574,
|
| 73 |
+
"step": 90
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.032,
|
| 77 |
+
"grad_norm": 0.765918500231858,
|
| 78 |
+
"learning_rate": 1.5814696485623005e-06,
|
| 79 |
+
"loss": 0.26980152130126955,
|
| 80 |
+
"step": 100
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.0352,
|
| 84 |
+
"grad_norm": 0.7344200083929374,
|
| 85 |
+
"learning_rate": 1.7412140575079875e-06,
|
| 86 |
+
"loss": 0.2695180416107178,
|
| 87 |
+
"step": 110
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.0384,
|
| 91 |
+
"grad_norm": 0.7057487416602337,
|
| 92 |
+
"learning_rate": 1.9009584664536742e-06,
|
| 93 |
+
"loss": 0.2582674264907837,
|
| 94 |
+
"step": 120
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.0416,
|
| 98 |
+
"grad_norm": 0.6996888798419932,
|
| 99 |
+
"learning_rate": 2.060702875399361e-06,
|
| 100 |
+
"loss": 0.2612154960632324,
|
| 101 |
+
"step": 130
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.0448,
|
| 105 |
+
"grad_norm": 0.7150606291134206,
|
| 106 |
+
"learning_rate": 2.220447284345048e-06,
|
| 107 |
+
"loss": 0.2520437717437744,
|
| 108 |
+
"step": 140
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.048,
|
| 112 |
+
"grad_norm": 0.7697242977250355,
|
| 113 |
+
"learning_rate": 2.380191693290735e-06,
|
| 114 |
+
"loss": 0.2501786470413208,
|
| 115 |
+
"step": 150
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.0512,
|
| 119 |
+
"grad_norm": 0.6327215717833664,
|
| 120 |
+
"learning_rate": 2.539936102236422e-06,
|
| 121 |
+
"loss": 0.24434318542480468,
|
| 122 |
+
"step": 160
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.0544,
|
| 126 |
+
"grad_norm": 0.7947096523807732,
|
| 127 |
+
"learning_rate": 2.699680511182109e-06,
|
| 128 |
+
"loss": 0.25281600952148436,
|
| 129 |
+
"step": 170
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.0576,
|
| 133 |
+
"grad_norm": 0.6717890611061146,
|
| 134 |
+
"learning_rate": 2.8594249201277955e-06,
|
| 135 |
+
"loss": 0.2454531669616699,
|
| 136 |
+
"step": 180
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0608,
|
| 140 |
+
"grad_norm": 0.7151585341922304,
|
| 141 |
+
"learning_rate": 3.0191693290734825e-06,
|
| 142 |
+
"loss": 0.2505363464355469,
|
| 143 |
+
"step": 190
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.064,
|
| 147 |
+
"grad_norm": 0.8601334705182279,
|
| 148 |
+
"learning_rate": 3.17891373801917e-06,
|
| 149 |
+
"loss": 0.2505714178085327,
|
| 150 |
+
"step": 200
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.0672,
|
| 154 |
+
"grad_norm": 0.6106680426063227,
|
| 155 |
+
"learning_rate": 3.3386581469648564e-06,
|
| 156 |
+
"loss": 0.24775364398956298,
|
| 157 |
+
"step": 210
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.0704,
|
| 161 |
+
"grad_norm": 0.6262984320818072,
|
| 162 |
+
"learning_rate": 3.4984025559105434e-06,
|
| 163 |
+
"loss": 0.24066565036773682,
|
| 164 |
+
"step": 220
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.0736,
|
| 168 |
+
"grad_norm": 0.6078537303186395,
|
| 169 |
+
"learning_rate": 3.6581469648562303e-06,
|
| 170 |
+
"loss": 0.24378209114074706,
|
| 171 |
+
"step": 230
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.0768,
|
| 175 |
+
"grad_norm": 0.5889510426869463,
|
| 176 |
+
"learning_rate": 3.817891373801918e-06,
|
| 177 |
+
"loss": 0.23820171356201172,
|
| 178 |
+
"step": 240
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.08,
|
| 182 |
+
"grad_norm": 0.5658292689427505,
|
| 183 |
+
"learning_rate": 3.977635782747604e-06,
|
| 184 |
+
"loss": 0.23654117584228515,
|
| 185 |
+
"step": 250
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.0832,
|
| 189 |
+
"grad_norm": 0.5757166706348428,
|
| 190 |
+
"learning_rate": 4.137380191693291e-06,
|
| 191 |
+
"loss": 0.23743386268615724,
|
| 192 |
+
"step": 260
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.0864,
|
| 196 |
+
"grad_norm": 0.5807034355359694,
|
| 197 |
+
"learning_rate": 4.297124600638978e-06,
|
| 198 |
+
"loss": 0.23970918655395507,
|
| 199 |
+
"step": 270
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.0896,
|
| 203 |
+
"grad_norm": 0.5634022487351626,
|
| 204 |
+
"learning_rate": 4.456869009584665e-06,
|
| 205 |
+
"loss": 0.23490209579467775,
|
| 206 |
+
"step": 280
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.0928,
|
| 210 |
+
"grad_norm": 0.5520223075835592,
|
| 211 |
+
"learning_rate": 4.616613418530352e-06,
|
| 212 |
+
"loss": 0.2404552936553955,
|
| 213 |
+
"step": 290
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.096,
|
| 217 |
+
"grad_norm": 0.5587222430473198,
|
| 218 |
+
"learning_rate": 4.776357827476039e-06,
|
| 219 |
+
"loss": 0.24298410415649413,
|
| 220 |
+
"step": 300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.0992,
|
| 224 |
+
"grad_norm": 0.542281258937415,
|
| 225 |
+
"learning_rate": 4.936102236421725e-06,
|
| 226 |
+
"loss": 0.22964231967926024,
|
| 227 |
+
"step": 310
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.1024,
|
| 231 |
+
"grad_norm": 0.6339707011249724,
|
| 232 |
+
"learning_rate": 4.999943833158769e-06,
|
| 233 |
+
"loss": 0.22938170433044433,
|
| 234 |
+
"step": 320
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.1056,
|
| 238 |
+
"grad_norm": 0.5290859105179109,
|
| 239 |
+
"learning_rate": 4.999600600490783e-06,
|
| 240 |
+
"loss": 0.23717782497406006,
|
| 241 |
+
"step": 330
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.1088,
|
| 245 |
+
"grad_norm": 0.574404257271199,
|
| 246 |
+
"learning_rate": 4.9989453817439345e-06,
|
| 247 |
+
"loss": 0.23035426139831544,
|
| 248 |
+
"step": 340
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.112,
|
| 252 |
+
"grad_norm": 0.5887719210155044,
|
| 253 |
+
"learning_rate": 4.997978258698942e-06,
|
| 254 |
+
"loss": 0.230421781539917,
|
| 255 |
+
"step": 350
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.1152,
|
| 259 |
+
"grad_norm": 0.5618660264892863,
|
| 260 |
+
"learning_rate": 4.996699352066659e-06,
|
| 261 |
+
"loss": 0.23192777633666992,
|
| 262 |
+
"step": 360
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.1184,
|
| 266 |
+
"grad_norm": 0.589113954603133,
|
| 267 |
+
"learning_rate": 4.995108821473014e-06,
|
| 268 |
+
"loss": 0.23194873332977295,
|
| 269 |
+
"step": 370
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.1216,
|
| 273 |
+
"grad_norm": 0.552581223712263,
|
| 274 |
+
"learning_rate": 4.993206865439084e-06,
|
| 275 |
+
"loss": 0.22629022598266602,
|
| 276 |
+
"step": 380
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.1248,
|
| 280 |
+
"grad_norm": 0.5506631212695152,
|
| 281 |
+
"learning_rate": 4.990993721356317e-06,
|
| 282 |
+
"loss": 0.22567858695983886,
|
| 283 |
+
"step": 390
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.128,
|
| 287 |
+
"grad_norm": 0.5210832665844604,
|
| 288 |
+
"learning_rate": 4.988469665456901e-06,
|
| 289 |
+
"loss": 0.22596418857574463,
|
| 290 |
+
"step": 400
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.1312,
|
| 294 |
+
"grad_norm": 0.5132503738005023,
|
| 295 |
+
"learning_rate": 4.985635012779288e-06,
|
| 296 |
+
"loss": 0.23435051441192628,
|
| 297 |
+
"step": 410
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.1344,
|
| 301 |
+
"grad_norm": 0.5264119522984109,
|
| 302 |
+
"learning_rate": 4.98249011712887e-06,
|
| 303 |
+
"loss": 0.2258882999420166,
|
| 304 |
+
"step": 420
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.1376,
|
| 308 |
+
"grad_norm": 0.5122311697688684,
|
| 309 |
+
"learning_rate": 4.979035371033824e-06,
|
| 310 |
+
"loss": 0.22527906894683838,
|
| 311 |
+
"step": 430
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.1408,
|
| 315 |
+
"grad_norm": 0.5105227090020142,
|
| 316 |
+
"learning_rate": 4.975271205696115e-06,
|
| 317 |
+
"loss": 0.2246992588043213,
|
| 318 |
+
"step": 440
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.144,
|
| 322 |
+
"grad_norm": 0.5307268054645026,
|
| 323 |
+
"learning_rate": 4.971198090937671e-06,
|
| 324 |
+
"loss": 0.2193459987640381,
|
| 325 |
+
"step": 450
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.1472,
|
| 329 |
+
"grad_norm": 0.46923570087876276,
|
| 330 |
+
"learning_rate": 4.966816535141756e-06,
|
| 331 |
+
"loss": 0.21553544998168944,
|
| 332 |
+
"step": 460
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.1504,
|
| 336 |
+
"grad_norm": 0.4881836025298746,
|
| 337 |
+
"learning_rate": 4.9621270851895035e-06,
|
| 338 |
+
"loss": 0.22505784034729004,
|
| 339 |
+
"step": 470
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.1536,
|
| 343 |
+
"grad_norm": 0.50506411723612,
|
| 344 |
+
"learning_rate": 4.957130326391662e-06,
|
| 345 |
+
"loss": 0.22673957347869872,
|
| 346 |
+
"step": 480
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.1568,
|
| 350 |
+
"grad_norm": 0.5086993434891525,
|
| 351 |
+
"learning_rate": 4.951826882415544e-06,
|
| 352 |
+
"loss": 0.22294471263885499,
|
| 353 |
+
"step": 490
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.16,
|
| 357 |
+
"grad_norm": 0.5280465251135189,
|
| 358 |
+
"learning_rate": 4.946217415207177e-06,
|
| 359 |
+
"loss": 0.21789300441741943,
|
| 360 |
+
"step": 500
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.1632,
|
| 364 |
+
"grad_norm": 0.5337843871964275,
|
| 365 |
+
"learning_rate": 4.940302624908689e-06,
|
| 366 |
+
"loss": 0.22192811965942383,
|
| 367 |
+
"step": 510
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.1664,
|
| 371 |
+
"grad_norm": 0.4884343559217744,
|
| 372 |
+
"learning_rate": 4.934083249770912e-06,
|
| 373 |
+
"loss": 0.21614904403686525,
|
| 374 |
+
"step": 520
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.1696,
|
| 378 |
+
"grad_norm": 0.5316592538281818,
|
| 379 |
+
"learning_rate": 4.927560066061251e-06,
|
| 380 |
+
"loss": 0.21973915100097657,
|
| 381 |
+
"step": 530
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.1728,
|
| 385 |
+
"grad_norm": 0.518761429695226,
|
| 386 |
+
"learning_rate": 4.920733887966783e-06,
|
| 387 |
+
"loss": 0.23207192420959472,
|
| 388 |
+
"step": 540
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.176,
|
| 392 |
+
"grad_norm": 0.511452747175852,
|
| 393 |
+
"learning_rate": 4.913605567492636e-06,
|
| 394 |
+
"loss": 0.21878607273101808,
|
| 395 |
+
"step": 550
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.1792,
|
| 399 |
+
"grad_norm": 0.49924599926539726,
|
| 400 |
+
"learning_rate": 4.906175994355656e-06,
|
| 401 |
+
"loss": 0.22075920104980468,
|
| 402 |
+
"step": 560
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.1824,
|
| 406 |
+
"grad_norm": 0.5259698850641532,
|
| 407 |
+
"learning_rate": 4.898446095873345e-06,
|
| 408 |
+
"loss": 0.22276382446289061,
|
| 409 |
+
"step": 570
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.1856,
|
| 413 |
+
"grad_norm": 0.501751014152873,
|
| 414 |
+
"learning_rate": 4.890416836848128e-06,
|
| 415 |
+
"loss": 0.21954989433288574,
|
| 416 |
+
"step": 580
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.1888,
|
| 420 |
+
"grad_norm": 0.5167201593356286,
|
| 421 |
+
"learning_rate": 4.882089219446925e-06,
|
| 422 |
+
"loss": 0.2145029067993164,
|
| 423 |
+
"step": 590
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.192,
|
| 427 |
+
"grad_norm": 0.5006060240232905,
|
| 428 |
+
"learning_rate": 4.873464283076074e-06,
|
| 429 |
+
"loss": 0.22003324031829835,
|
| 430 |
+
"step": 600
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.1952,
|
| 434 |
+
"grad_norm": 0.4477538874438277,
|
| 435 |
+
"learning_rate": 4.864543104251587e-06,
|
| 436 |
+
"loss": 0.21916275024414061,
|
| 437 |
+
"step": 610
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.1984,
|
| 441 |
+
"grad_norm": 0.4832933241270485,
|
| 442 |
+
"learning_rate": 4.855326796464798e-06,
|
| 443 |
+
"loss": 0.2203526973724365,
|
| 444 |
+
"step": 620
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.2016,
|
| 448 |
+
"grad_norm": 0.5359361967005408,
|
| 449 |
+
"learning_rate": 4.8458165100433725e-06,
|
| 450 |
+
"loss": 0.21596732139587402,
|
| 451 |
+
"step": 630
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.2048,
|
| 455 |
+
"grad_norm": 0.5708003689943741,
|
| 456 |
+
"learning_rate": 4.836013432007738e-06,
|
| 457 |
+
"loss": 0.2171140193939209,
|
| 458 |
+
"step": 640
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.208,
|
| 462 |
+
"grad_norm": 0.4831169531465719,
|
| 463 |
+
"learning_rate": 4.825918785922921e-06,
|
| 464 |
+
"loss": 0.22040581703186035,
|
| 465 |
+
"step": 650
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.2112,
|
| 469 |
+
"grad_norm": 0.4982382400104379,
|
| 470 |
+
"learning_rate": 4.8155338317458315e-06,
|
| 471 |
+
"loss": 0.21841506958007811,
|
| 472 |
+
"step": 660
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.2144,
|
| 476 |
+
"grad_norm": 0.4741071764041748,
|
| 477 |
+
"learning_rate": 4.804859865668002e-06,
|
| 478 |
+
"loss": 0.2143453598022461,
|
| 479 |
+
"step": 670
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.2176,
|
| 483 |
+
"grad_norm": 0.47853550451884025,
|
| 484 |
+
"learning_rate": 4.793898219953804e-06,
|
| 485 |
+
"loss": 0.21545085906982422,
|
| 486 |
+
"step": 680
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.2208,
|
| 490 |
+
"grad_norm": 0.4902247743421047,
|
| 491 |
+
"learning_rate": 4.782650262774164e-06,
|
| 492 |
+
"loss": 0.2166231393814087,
|
| 493 |
+
"step": 690
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.224,
|
| 497 |
+
"grad_norm": 0.4611717059287351,
|
| 498 |
+
"learning_rate": 4.7711173980357886e-06,
|
| 499 |
+
"loss": 0.21284222602844238,
|
| 500 |
+
"step": 700
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.2272,
|
| 504 |
+
"grad_norm": 0.4815654128340087,
|
| 505 |
+
"learning_rate": 4.759301065205947e-06,
|
| 506 |
+
"loss": 0.21358721256256102,
|
| 507 |
+
"step": 710
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.2304,
|
| 511 |
+
"grad_norm": 0.5049245613626656,
|
| 512 |
+
"learning_rate": 4.7472027391328e-06,
|
| 513 |
+
"loss": 0.21447527408599854,
|
| 514 |
+
"step": 720
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.2336,
|
| 518 |
+
"grad_norm": 0.4758997167389971,
|
| 519 |
+
"learning_rate": 4.734823929861317e-06,
|
| 520 |
+
"loss": 0.21809780597686768,
|
| 521 |
+
"step": 730
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.2368,
|
| 525 |
+
"grad_norm": 0.5423173365143716,
|
| 526 |
+
"learning_rate": 4.722166182444801e-06,
|
| 527 |
+
"loss": 0.21390962600708008,
|
| 528 |
+
"step": 740
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.24,
|
| 532 |
+
"grad_norm": 0.44572231492476455,
|
| 533 |
+
"learning_rate": 4.709231076752045e-06,
|
| 534 |
+
"loss": 0.21404554843902587,
|
| 535 |
+
"step": 750
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.2432,
|
| 539 |
+
"grad_norm": 0.4848421373802031,
|
| 540 |
+
"learning_rate": 4.696020227270142e-06,
|
| 541 |
+
"loss": 0.21710457801818847,
|
| 542 |
+
"step": 760
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.2464,
|
| 546 |
+
"grad_norm": 0.518532765750562,
|
| 547 |
+
"learning_rate": 4.6825352829029705e-06,
|
| 548 |
+
"loss": 0.21285481452941896,
|
| 549 |
+
"step": 770
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.2496,
|
| 553 |
+
"grad_norm": 0.5008678397970389,
|
| 554 |
+
"learning_rate": 4.668777926765392e-06,
|
| 555 |
+
"loss": 0.21155524253845215,
|
| 556 |
+
"step": 780
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.2528,
|
| 560 |
+
"grad_norm": 0.48720974823345864,
|
| 561 |
+
"learning_rate": 4.6547498759731725e-06,
|
| 562 |
+
"loss": 0.20655455589294433,
|
| 563 |
+
"step": 790
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.256,
|
| 567 |
+
"grad_norm": 0.49528977499161353,
|
| 568 |
+
"learning_rate": 4.6404528814286575e-06,
|
| 569 |
+
"loss": 0.2101435422897339,
|
| 570 |
+
"step": 800
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.2592,
|
| 574 |
+
"grad_norm": 0.4532686250809506,
|
| 575 |
+
"learning_rate": 4.6258887276022425e-06,
|
| 576 |
+
"loss": 0.21684365272521972,
|
| 577 |
+
"step": 810
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.2624,
|
| 581 |
+
"grad_norm": 0.49803115837380546,
|
| 582 |
+
"learning_rate": 4.611059232309639e-06,
|
| 583 |
+
"loss": 0.21193151473999022,
|
| 584 |
+
"step": 820
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.2656,
|
| 588 |
+
"grad_norm": 0.5153783225404047,
|
| 589 |
+
"learning_rate": 4.595966246484986e-06,
|
| 590 |
+
"loss": 0.21344296932220458,
|
| 591 |
+
"step": 830
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.2688,
|
| 595 |
+
"grad_norm": 0.4765272009238815,
|
| 596 |
+
"learning_rate": 4.580611653949829e-06,
|
| 597 |
+
"loss": 0.21319386959075928,
|
| 598 |
+
"step": 840
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.272,
|
| 602 |
+
"grad_norm": 0.5228745905777464,
|
| 603 |
+
"learning_rate": 4.564997371177992e-06,
|
| 604 |
+
"loss": 0.21112470626831054,
|
| 605 |
+
"step": 850
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.2752,
|
| 609 |
+
"grad_norm": 0.4583805155148445,
|
| 610 |
+
"learning_rate": 4.54912534705637e-06,
|
| 611 |
+
"loss": 0.2108391284942627,
|
| 612 |
+
"step": 860
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.2784,
|
| 616 |
+
"grad_norm": 0.4920259584441244,
|
| 617 |
+
"learning_rate": 4.532997562641683e-06,
|
| 618 |
+
"loss": 0.20768051147460936,
|
| 619 |
+
"step": 870
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.2816,
|
| 623 |
+
"grad_norm": 0.5200095181799963,
|
| 624 |
+
"learning_rate": 4.516616030913214e-06,
|
| 625 |
+
"loss": 0.21211957931518555,
|
| 626 |
+
"step": 880
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.2848,
|
| 630 |
+
"grad_norm": 0.4788503683270311,
|
| 631 |
+
"learning_rate": 4.499982796521556e-06,
|
| 632 |
+
"loss": 0.20693025588989258,
|
| 633 |
+
"step": 890
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.288,
|
| 637 |
+
"grad_norm": 0.4666456137071941,
|
| 638 |
+
"learning_rate": 4.48309993553341e-06,
|
| 639 |
+
"loss": 0.20890872478485106,
|
| 640 |
+
"step": 900
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.2912,
|
| 644 |
+
"grad_norm": 0.4794527139448749,
|
| 645 |
+
"learning_rate": 4.465969555172468e-06,
|
| 646 |
+
"loss": 0.20777955055236816,
|
| 647 |
+
"step": 910
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.2944,
|
| 651 |
+
"grad_norm": 0.4616610840587355,
|
| 652 |
+
"learning_rate": 4.448593793556391e-06,
|
| 653 |
+
"loss": 0.21416122913360597,
|
| 654 |
+
"step": 920
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.2976,
|
| 658 |
+
"grad_norm": 0.47725407011391663,
|
| 659 |
+
"learning_rate": 4.430974819429954e-06,
|
| 660 |
+
"loss": 0.20783448219299316,
|
| 661 |
+
"step": 930
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.3008,
|
| 665 |
+
"grad_norm": 0.4596350013424985,
|
| 666 |
+
"learning_rate": 4.413114831894344e-06,
|
| 667 |
+
"loss": 0.20199823379516602,
|
| 668 |
+
"step": 940
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.304,
|
| 672 |
+
"grad_norm": 0.4940149958405755,
|
| 673 |
+
"learning_rate": 4.3950160601326865e-06,
|
| 674 |
+
"loss": 0.20049993991851806,
|
| 675 |
+
"step": 950
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.3072,
|
| 679 |
+
"grad_norm": 0.4891958940488766,
|
| 680 |
+
"learning_rate": 4.376680763131811e-06,
|
| 681 |
+
"loss": 0.20765538215637208,
|
| 682 |
+
"step": 960
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.3104,
|
| 686 |
+
"grad_norm": 0.5373640149223949,
|
| 687 |
+
"learning_rate": 4.358111229400296e-06,
|
| 688 |
+
"loss": 0.2103745460510254,
|
| 689 |
+
"step": 970
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 0.3136,
|
| 693 |
+
"grad_norm": 0.5035919946088194,
|
| 694 |
+
"learning_rate": 4.33930977668283e-06,
|
| 695 |
+
"loss": 0.2148181438446045,
|
| 696 |
+
"step": 980
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 0.3168,
|
| 700 |
+
"grad_norm": 0.498832420199319,
|
| 701 |
+
"learning_rate": 4.320278751670922e-06,
|
| 702 |
+
"loss": 0.20667800903320313,
|
| 703 |
+
"step": 990
|
| 704 |
+
},
|
| 705 |
+
{
|
| 706 |
+
"epoch": 0.32,
|
| 707 |
+
"grad_norm": 0.5016480811009209,
|
| 708 |
+
"learning_rate": 4.301020529710009e-06,
|
| 709 |
+
"loss": 0.20847175121307374,
|
| 710 |
+
"step": 1000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.3232,
|
| 714 |
+
"grad_norm": 0.5355131410598809,
|
| 715 |
+
"learning_rate": 4.281537514502962e-06,
|
| 716 |
+
"loss": 0.21192097663879395,
|
| 717 |
+
"step": 1010
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.3264,
|
| 721 |
+
"grad_norm": 0.49710771531514497,
|
| 722 |
+
"learning_rate": 4.261832137810093e-06,
|
| 723 |
+
"loss": 0.20849306583404542,
|
| 724 |
+
"step": 1020
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.3296,
|
| 728 |
+
"grad_norm": 0.4702938633516668,
|
| 729 |
+
"learning_rate": 4.241906859145611e-06,
|
| 730 |
+
"loss": 0.20947628021240233,
|
| 731 |
+
"step": 1030
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.3328,
|
| 735 |
+
"grad_norm": 0.47328762785100176,
|
| 736 |
+
"learning_rate": 4.221764165470661e-06,
|
| 737 |
+
"loss": 0.20568199157714845,
|
| 738 |
+
"step": 1040
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.336,
|
| 742 |
+
"grad_norm": 0.48090607151875236,
|
| 743 |
+
"learning_rate": 4.201406570882898e-06,
|
| 744 |
+
"loss": 0.20522446632385255,
|
| 745 |
+
"step": 1050
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.3392,
|
| 749 |
+
"grad_norm": 0.46870182419574746,
|
| 750 |
+
"learning_rate": 4.180836616302704e-06,
|
| 751 |
+
"loss": 0.2044762134552002,
|
| 752 |
+
"step": 1060
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.3424,
|
| 756 |
+
"grad_norm": 0.49284234006242156,
|
| 757 |
+
"learning_rate": 4.160056869156041e-06,
|
| 758 |
+
"loss": 0.20835609436035157,
|
| 759 |
+
"step": 1070
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.3456,
|
| 763 |
+
"grad_norm": 0.425482663225026,
|
| 764 |
+
"learning_rate": 4.139069923053995e-06,
|
| 765 |
+
"loss": 0.20575876235961915,
|
| 766 |
+
"step": 1080
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.3488,
|
| 770 |
+
"grad_norm": 0.46647669293000804,
|
| 771 |
+
"learning_rate": 4.117878397469062e-06,
|
| 772 |
+
"loss": 0.20992250442504884,
|
| 773 |
+
"step": 1090
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.352,
|
| 777 |
+
"grad_norm": 0.4464343988416538,
|
| 778 |
+
"learning_rate": 4.096484937408195e-06,
|
| 779 |
+
"loss": 0.20092244148254396,
|
| 780 |
+
"step": 1100
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.3552,
|
| 784 |
+
"grad_norm": 0.5116088744854695,
|
| 785 |
+
"learning_rate": 4.074892213082676e-06,
|
| 786 |
+
"loss": 0.20036702156066893,
|
| 787 |
+
"step": 1110
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.3584,
|
| 791 |
+
"grad_norm": 4.940314739525779,
|
| 792 |
+
"learning_rate": 4.0531029195748265e-06,
|
| 793 |
+
"loss": 0.21338913440704346,
|
| 794 |
+
"step": 1120
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.3616,
|
| 798 |
+
"grad_norm": 0.4721397920115156,
|
| 799 |
+
"learning_rate": 4.03111977650163e-06,
|
| 800 |
+
"loss": 0.20792775154113768,
|
| 801 |
+
"step": 1130
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.3648,
|
| 805 |
+
"grad_norm": 0.5105519348301445,
|
| 806 |
+
"learning_rate": 4.008945527675281e-06,
|
| 807 |
+
"loss": 0.2061443328857422,
|
| 808 |
+
"step": 1140
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.368,
|
| 812 |
+
"grad_norm": 0.523180958068929,
|
| 813 |
+
"learning_rate": 3.986582940760717e-06,
|
| 814 |
+
"loss": 0.1962942123413086,
|
| 815 |
+
"step": 1150
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.3712,
|
| 819 |
+
"grad_norm": 0.5027335828799008,
|
| 820 |
+
"learning_rate": 3.9640348069301785e-06,
|
| 821 |
+
"loss": 0.2031947612762451,
|
| 822 |
+
"step": 1160
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.3744,
|
| 826 |
+
"grad_norm": 0.48735270934050073,
|
| 827 |
+
"learning_rate": 3.941303940514826e-06,
|
| 828 |
+
"loss": 0.20448057651519774,
|
| 829 |
+
"step": 1170
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.3776,
|
| 833 |
+
"grad_norm": 0.5075332440871839,
|
| 834 |
+
"learning_rate": 3.918393178653472e-06,
|
| 835 |
+
"loss": 0.20594587326049804,
|
| 836 |
+
"step": 1180
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.3808,
|
| 840 |
+
"grad_norm": 0.4485083644552742,
|
| 841 |
+
"learning_rate": 3.895305380938468e-06,
|
| 842 |
+
"loss": 0.20264167785644532,
|
| 843 |
+
"step": 1190
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 0.384,
|
| 847 |
+
"grad_norm": 0.4568492727427137,
|
| 848 |
+
"learning_rate": 3.872043429058783e-06,
|
| 849 |
+
"loss": 0.20010733604431152,
|
| 850 |
+
"step": 1200
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 0.3872,
|
| 854 |
+
"grad_norm": 0.46103501808297814,
|
| 855 |
+
"learning_rate": 3.84861022644033e-06,
|
| 856 |
+
"loss": 0.2026883602142334,
|
| 857 |
+
"step": 1210
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 0.3904,
|
| 861 |
+
"grad_norm": 0.46609834517793386,
|
| 862 |
+
"learning_rate": 3.825008697883574e-06,
|
| 863 |
+
"loss": 0.21079249382019044,
|
| 864 |
+
"step": 1220
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 0.3936,
|
| 868 |
+
"grad_norm": 0.49992288047242467,
|
| 869 |
+
"learning_rate": 3.8012417891984776e-06,
|
| 870 |
+
"loss": 0.2031094551086426,
|
| 871 |
+
"step": 1230
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 0.3968,
|
| 875 |
+
"grad_norm": 0.4746264528155682,
|
| 876 |
+
"learning_rate": 3.777312466836819e-06,
|
| 877 |
+
"loss": 0.20238199234008789,
|
| 878 |
+
"step": 1240
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.4,
|
| 882 |
+
"grad_norm": 0.45243385346205817,
|
| 883 |
+
"learning_rate": 3.7532237175219378e-06,
|
| 884 |
+
"loss": 0.20085253715515136,
|
| 885 |
+
"step": 1250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.4032,
|
| 889 |
+
"grad_norm": 0.48931316379420287,
|
| 890 |
+
"learning_rate": 3.728978547875948e-06,
|
| 891 |
+
"loss": 0.20520598888397218,
|
| 892 |
+
"step": 1260
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.4064,
|
| 896 |
+
"grad_norm": 0.5229456414008956,
|
| 897 |
+
"learning_rate": 3.7045799840444712e-06,
|
| 898 |
+
"loss": 0.19984333515167235,
|
| 899 |
+
"step": 1270
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.4096,
|
| 903 |
+
"grad_norm": 0.4773055647919508,
|
| 904 |
+
"learning_rate": 3.6800310713189258e-06,
|
| 905 |
+
"loss": 0.20064287185668944,
|
| 906 |
+
"step": 1280
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.4128,
|
| 910 |
+
"grad_norm": 0.4824962267097886,
|
| 911 |
+
"learning_rate": 3.6553348737564328e-06,
|
| 912 |
+
"loss": 0.20138092041015626,
|
| 913 |
+
"step": 1290
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.416,
|
| 917 |
+
"grad_norm": 0.47245858486532044,
|
| 918 |
+
"learning_rate": 3.6304944737973794e-06,
|
| 919 |
+
"loss": 0.20704314708709717,
|
| 920 |
+
"step": 1300
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.4192,
|
| 924 |
+
"grad_norm": 0.47670774891547607,
|
| 925 |
+
"learning_rate": 3.6055129718806836e-06,
|
| 926 |
+
"loss": 0.20015296936035157,
|
| 927 |
+
"step": 1310
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.4224,
|
| 931 |
+
"grad_norm": 0.4553061754046557,
|
| 932 |
+
"learning_rate": 3.5803934860568134e-06,
|
| 933 |
+
"loss": 0.19692450761795044,
|
| 934 |
+
"step": 1320
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.4256,
|
| 938 |
+
"grad_norm": 0.5124220374815842,
|
| 939 |
+
"learning_rate": 3.5551391515986163e-06,
|
| 940 |
+
"loss": 0.2016448497772217,
|
| 941 |
+
"step": 1330
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.4288,
|
| 945 |
+
"grad_norm": 0.4809826187082155,
|
| 946 |
+
"learning_rate": 3.529753120609982e-06,
|
| 947 |
+
"loss": 0.19793987274169922,
|
| 948 |
+
"step": 1340
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.432,
|
| 952 |
+
"grad_norm": 0.48798480914379067,
|
| 953 |
+
"learning_rate": 3.5042385616324243e-06,
|
| 954 |
+
"loss": 0.20041651725769044,
|
| 955 |
+
"step": 1350
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.4352,
|
| 959 |
+
"grad_norm": 0.4589600174491072,
|
| 960 |
+
"learning_rate": 3.4785986592495934e-06,
|
| 961 |
+
"loss": 0.19874777793884277,
|
| 962 |
+
"step": 1360
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.4384,
|
| 966 |
+
"grad_norm": 0.44810416886840765,
|
| 967 |
+
"learning_rate": 3.452836613689803e-06,
|
| 968 |
+
"loss": 0.19696075916290284,
|
| 969 |
+
"step": 1370
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 0.4416,
|
| 973 |
+
"grad_norm": 0.4584133576368786,
|
| 974 |
+
"learning_rate": 3.426955640426584e-06,
|
| 975 |
+
"loss": 0.20014967918395996,
|
| 976 |
+
"step": 1380
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 0.4448,
|
| 980 |
+
"grad_norm": 0.46474214573205574,
|
| 981 |
+
"learning_rate": 3.4009589697773605e-06,
|
| 982 |
+
"loss": 0.19937365055084227,
|
| 983 |
+
"step": 1390
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 0.448,
|
| 987 |
+
"grad_norm": 0.4671452045462699,
|
| 988 |
+
"learning_rate": 3.3748498465002475e-06,
|
| 989 |
+
"loss": 0.19703936576843262,
|
| 990 |
+
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 0.4512,
|
| 994 |
+
"grad_norm": 0.48450994567172556,
|
| 995 |
+
"learning_rate": 3.3486315293890693e-06,
|
| 996 |
+
"loss": 0.20506525039672852,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 0.4544,
|
| 1001 |
+
"grad_norm": 0.48940983460177095,
|
| 1002 |
+
"learning_rate": 3.3223072908666053e-06,
|
| 1003 |
+
"loss": 0.19508613348007203,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 0.4576,
|
| 1008 |
+
"grad_norm": 0.5510507698314822,
|
| 1009 |
+
"learning_rate": 3.295880416576153e-06,
|
| 1010 |
+
"loss": 0.20555310249328612,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 0.4608,
|
| 1015 |
+
"grad_norm": 0.45473195837081576,
|
| 1016 |
+
"learning_rate": 3.269354204971427e-06,
|
| 1017 |
+
"loss": 0.19813575744628906,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 0.464,
|
| 1022 |
+
"grad_norm": 0.4854091562037593,
|
| 1023 |
+
"learning_rate": 3.242731966904865e-06,
|
| 1024 |
+
"loss": 0.19694712162017822,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 0.4672,
|
| 1029 |
+
"grad_norm": 0.4637441174996577,
|
| 1030 |
+
"learning_rate": 3.2160170252143913e-06,
|
| 1031 |
+
"loss": 0.1959088087081909,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 0.4704,
|
| 1036 |
+
"grad_norm": 0.4460606032902631,
|
| 1037 |
+
"learning_rate": 3.1892127143086716e-06,
|
| 1038 |
+
"loss": 0.20340628623962403,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 0.4736,
|
| 1043 |
+
"grad_norm": 0.4768689558424143,
|
| 1044 |
+
"learning_rate": 3.1623223797509347e-06,
|
| 1045 |
+
"loss": 0.19146734476089478,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 0.4768,
|
| 1050 |
+
"grad_norm": 0.46631038217283505,
|
| 1051 |
+
"learning_rate": 3.135349377841396e-06,
|
| 1052 |
+
"loss": 0.19588179588317872,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 0.48,
|
| 1057 |
+
"grad_norm": 0.48197350793708515,
|
| 1058 |
+
"learning_rate": 3.1082970751983497e-06,
|
| 1059 |
+
"loss": 0.20245718955993652,
|
| 1060 |
+
"step": 1500
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 0.4832,
|
| 1064 |
+
"grad_norm": 0.44408940491911375,
|
| 1065 |
+
"learning_rate": 3.0811688483379546e-06,
|
| 1066 |
+
"loss": 0.19959219694137573,
|
| 1067 |
+
"step": 1510
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 0.4864,
|
| 1071 |
+
"grad_norm": 0.47255519902507054,
|
| 1072 |
+
"learning_rate": 3.0539680832528074e-06,
|
| 1073 |
+
"loss": 0.1994904398918152,
|
| 1074 |
+
"step": 1520
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 0.4896,
|
| 1078 |
+
"grad_norm": 0.48800627171777977,
|
| 1079 |
+
"learning_rate": 3.026698174989316e-06,
|
| 1080 |
+
"loss": 0.19807126522064208,
|
| 1081 |
+
"step": 1530
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.4928,
|
| 1085 |
+
"grad_norm": 0.4748737132528679,
|
| 1086 |
+
"learning_rate": 2.999362527223952e-06,
|
| 1087 |
+
"loss": 0.19806113243103027,
|
| 1088 |
+
"step": 1540
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.496,
|
| 1092 |
+
"grad_norm": 0.47637730688550123,
|
| 1093 |
+
"learning_rate": 2.9719645518384194e-06,
|
| 1094 |
+
"loss": 0.19955278635025026,
|
| 1095 |
+
"step": 1550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.4992,
|
| 1099 |
+
"grad_norm": 0.5411554495039922,
|
| 1100 |
+
"learning_rate": 2.944507668493807e-06,
|
| 1101 |
+
"loss": 0.202299165725708,
|
| 1102 |
+
"step": 1560
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.5024,
|
| 1106 |
+
"grad_norm": 0.48642193804707995,
|
| 1107 |
+
"learning_rate": 2.9169953042037623e-06,
|
| 1108 |
+
"loss": 0.19863581657409668,
|
| 1109 |
+
"step": 1570
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.5056,
|
| 1113 |
+
"grad_norm": 0.5363553346933208,
|
| 1114 |
+
"learning_rate": 2.889430892906754e-06,
|
| 1115 |
+
"loss": 0.19409118890762328,
|
| 1116 |
+
"step": 1580
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.5088,
|
| 1120 |
+
"grad_norm": 0.47187050499878397,
|
| 1121 |
+
"learning_rate": 2.861817875037462e-06,
|
| 1122 |
+
"loss": 0.1912764310836792,
|
| 1123 |
+
"step": 1590
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.512,
|
| 1127 |
+
"grad_norm": 0.5163595948637988,
|
| 1128 |
+
"learning_rate": 2.8341596970973683e-06,
|
| 1129 |
+
"loss": 0.20115599632263184,
|
| 1130 |
+
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.5152,
|
| 1134 |
+
"grad_norm": 0.5033907485073755,
|
| 1135 |
+
"learning_rate": 2.80645981122458e-06,
|
| 1136 |
+
"loss": 0.19687057733535768,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.5184,
|
| 1141 |
+
"grad_norm": 0.4753722793172304,
|
| 1142 |
+
"learning_rate": 2.7787216747629508e-06,
|
| 1143 |
+
"loss": 0.20292258262634277,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.5216,
|
| 1148 |
+
"grad_norm": 0.46781165760957,
|
| 1149 |
+
"learning_rate": 2.7509487498305615e-06,
|
| 1150 |
+
"loss": 0.18959319591522217,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.5248,
|
| 1155 |
+
"grad_norm": 0.4803554793777817,
|
| 1156 |
+
"learning_rate": 2.7231445028875924e-06,
|
| 1157 |
+
"loss": 0.19619333744049072,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.528,
|
| 1162 |
+
"grad_norm": 0.43719126287209875,
|
| 1163 |
+
"learning_rate": 2.6953124043036604e-06,
|
| 1164 |
+
"loss": 0.19511375427246094,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.5312,
|
| 1169 |
+
"grad_norm": 0.4689037514921924,
|
| 1170 |
+
"learning_rate": 2.667455927924667e-06,
|
| 1171 |
+
"loss": 0.19399585723876953,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.5344,
|
| 1176 |
+
"grad_norm": 0.48479905355532704,
|
| 1177 |
+
"learning_rate": 2.6395785506392164e-06,
|
| 1178 |
+
"loss": 0.1896076202392578,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.5376,
|
| 1183 |
+
"grad_norm": 0.516453973005613,
|
| 1184 |
+
"learning_rate": 2.6116837519446407e-06,
|
| 1185 |
+
"loss": 0.1939442992210388,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.5408,
|
| 1190 |
+
"grad_norm": 0.47710575683228795,
|
| 1191 |
+
"learning_rate": 2.5837750135127192e-06,
|
| 1192 |
+
"loss": 0.19078316688537597,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.544,
|
| 1197 |
+
"grad_norm": 0.47654319681013313,
|
| 1198 |
+
"learning_rate": 2.555855818755108e-06,
|
| 1199 |
+
"loss": 0.19690483808517456,
|
| 1200 |
+
"step": 1700
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.5472,
|
| 1204 |
+
"grad_norm": 0.5030326386548561,
|
| 1205 |
+
"learning_rate": 2.5279296523885636e-06,
|
| 1206 |
+
"loss": 0.19325432777404786,
|
| 1207 |
+
"step": 1710
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.5504,
|
| 1211 |
+
"grad_norm": 0.49452423153374125,
|
| 1212 |
+
"learning_rate": 2.5e-06,
|
| 1213 |
+
"loss": 0.19436432123184205,
|
| 1214 |
+
"step": 1720
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.5536,
|
| 1218 |
+
"grad_norm": 0.5135088244704792,
|
| 1219 |
+
"learning_rate": 2.472070347611437e-06,
|
| 1220 |
+
"loss": 0.1878933072090149,
|
| 1221 |
+
"step": 1730
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.5568,
|
| 1225 |
+
"grad_norm": 0.5160118206798595,
|
| 1226 |
+
"learning_rate": 2.444144181244893e-06,
|
| 1227 |
+
"loss": 0.19355961084365844,
|
| 1228 |
+
"step": 1740
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 0.56,
|
| 1232 |
+
"grad_norm": 0.5069308846787346,
|
| 1233 |
+
"learning_rate": 2.416224986487282e-06,
|
| 1234 |
+
"loss": 0.19122695922851562,
|
| 1235 |
+
"step": 1750
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 0.5632,
|
| 1239 |
+
"grad_norm": 0.5385800538703149,
|
| 1240 |
+
"learning_rate": 2.3883162480553605e-06,
|
| 1241 |
+
"loss": 0.18820159435272216,
|
| 1242 |
+
"step": 1760
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 0.5664,
|
| 1246 |
+
"grad_norm": 0.49129457413116234,
|
| 1247 |
+
"learning_rate": 2.3604214493607844e-06,
|
| 1248 |
+
"loss": 0.19197521209716797,
|
| 1249 |
+
"step": 1770
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 0.5696,
|
| 1253 |
+
"grad_norm": 0.4908165776123557,
|
| 1254 |
+
"learning_rate": 2.332544072075333e-06,
|
| 1255 |
+
"loss": 0.19534649848937988,
|
| 1256 |
+
"step": 1780
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 0.5728,
|
| 1260 |
+
"grad_norm": 0.49497656453552125,
|
| 1261 |
+
"learning_rate": 2.30468759569634e-06,
|
| 1262 |
+
"loss": 0.19484236240386962,
|
| 1263 |
+
"step": 1790
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 0.576,
|
| 1267 |
+
"grad_norm": 0.466973816624908,
|
| 1268 |
+
"learning_rate": 2.276855497112408e-06,
|
| 1269 |
+
"loss": 0.191474986076355,
|
| 1270 |
+
"step": 1800
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 0.5792,
|
| 1274 |
+
"grad_norm": 0.498294237386886,
|
| 1275 |
+
"learning_rate": 2.2490512501694394e-06,
|
| 1276 |
+
"loss": 0.18636202812194824,
|
| 1277 |
+
"step": 1810
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 0.5824,
|
| 1281 |
+
"grad_norm": 0.5110432771457695,
|
| 1282 |
+
"learning_rate": 2.2212783252370496e-06,
|
| 1283 |
+
"loss": 0.19112749099731446,
|
| 1284 |
+
"step": 1820
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 0.5856,
|
| 1288 |
+
"grad_norm": 0.4923044532988948,
|
| 1289 |
+
"learning_rate": 2.1935401887754213e-06,
|
| 1290 |
+
"loss": 0.19590845108032226,
|
| 1291 |
+
"step": 1830
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 0.5888,
|
| 1295 |
+
"grad_norm": 0.49881036242858373,
|
| 1296 |
+
"learning_rate": 2.165840302902632e-06,
|
| 1297 |
+
"loss": 0.18917866945266723,
|
| 1298 |
+
"step": 1840
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 0.592,
|
| 1302 |
+
"grad_norm": 0.5070848566140863,
|
| 1303 |
+
"learning_rate": 2.1381821249625383e-06,
|
| 1304 |
+
"loss": 0.1955878973007202,
|
| 1305 |
+
"step": 1850
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 0.5952,
|
| 1309 |
+
"grad_norm": 0.5245919327161893,
|
| 1310 |
+
"learning_rate": 2.1105691070932465e-06,
|
| 1311 |
+
"loss": 0.18681724071502687,
|
| 1312 |
+
"step": 1860
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 0.5984,
|
| 1316 |
+
"grad_norm": 0.5043139368489675,
|
| 1317 |
+
"learning_rate": 2.083004695796238e-06,
|
| 1318 |
+
"loss": 0.185194993019104,
|
| 1319 |
+
"step": 1870
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 0.6016,
|
| 1323 |
+
"grad_norm": 0.5180452275250914,
|
| 1324 |
+
"learning_rate": 2.055492331506194e-06,
|
| 1325 |
+
"loss": 0.1928567886352539,
|
| 1326 |
+
"step": 1880
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 0.6048,
|
| 1330 |
+
"grad_norm": 0.5320215436686966,
|
| 1331 |
+
"learning_rate": 2.0280354481615814e-06,
|
| 1332 |
+
"loss": 0.19074957370758056,
|
| 1333 |
+
"step": 1890
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 0.608,
|
| 1337 |
+
"grad_norm": 0.4725862343819939,
|
| 1338 |
+
"learning_rate": 2.000637472776049e-06,
|
| 1339 |
+
"loss": 0.19257795810699463,
|
| 1340 |
+
"step": 1900
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 0.6112,
|
| 1344 |
+
"grad_norm": 0.46908638481055026,
|
| 1345 |
+
"learning_rate": 1.973301825010685e-06,
|
| 1346 |
+
"loss": 0.18594731092453004,
|
| 1347 |
+
"step": 1910
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 0.6144,
|
| 1351 |
+
"grad_norm": 0.5595713557618127,
|
| 1352 |
+
"learning_rate": 1.9460319167471934e-06,
|
| 1353 |
+
"loss": 0.19121139049530028,
|
| 1354 |
+
"step": 1920
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 0.6176,
|
| 1358 |
+
"grad_norm": 0.507704360185881,
|
| 1359 |
+
"learning_rate": 1.9188311516620466e-06,
|
| 1360 |
+
"loss": 0.18624544143676758,
|
| 1361 |
+
"step": 1930
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 0.6208,
|
| 1365 |
+
"grad_norm": 0.4860192603301521,
|
| 1366 |
+
"learning_rate": 1.891702924801651e-06,
|
| 1367 |
+
"loss": 0.19231630563735963,
|
| 1368 |
+
"step": 1940
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 0.624,
|
| 1372 |
+
"grad_norm": 0.5275367662218493,
|
| 1373 |
+
"learning_rate": 1.864650622158604e-06,
|
| 1374 |
+
"loss": 0.19608126878738402,
|
| 1375 |
+
"step": 1950
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 0.6272,
|
| 1379 |
+
"grad_norm": 0.49282562967431837,
|
| 1380 |
+
"learning_rate": 1.8376776202490666e-06,
|
| 1381 |
+
"loss": 0.19235665798187257,
|
| 1382 |
+
"step": 1960
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 0.6304,
|
| 1386 |
+
"grad_norm": 0.5182260002744055,
|
| 1387 |
+
"learning_rate": 1.8107872856913293e-06,
|
| 1388 |
+
"loss": 0.18613014221191407,
|
| 1389 |
+
"step": 1970
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 0.6336,
|
| 1393 |
+
"grad_norm": 0.5103313601861706,
|
| 1394 |
+
"learning_rate": 1.7839829747856096e-06,
|
| 1395 |
+
"loss": 0.1881113052368164,
|
| 1396 |
+
"step": 1980
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 0.6368,
|
| 1400 |
+
"grad_norm": 0.5451499180289584,
|
| 1401 |
+
"learning_rate": 1.7572680330951359e-06,
|
| 1402 |
+
"loss": 0.18735458850860595,
|
| 1403 |
+
"step": 1990
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 0.64,
|
| 1407 |
+
"grad_norm": 0.5090636315844644,
|
| 1408 |
+
"learning_rate": 1.7306457950285747e-06,
|
| 1409 |
+
"loss": 0.1885282278060913,
|
| 1410 |
+
"step": 2000
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 0.6432,
|
| 1414 |
+
"grad_norm": 0.4758742975901025,
|
| 1415 |
+
"learning_rate": 1.704119583423848e-06,
|
| 1416 |
+
"loss": 0.18241598606109619,
|
| 1417 |
+
"step": 2010
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 0.6464,
|
| 1421 |
+
"grad_norm": 0.49602490022248863,
|
| 1422 |
+
"learning_rate": 1.677692709133396e-06,
|
| 1423 |
+
"loss": 0.19074147939682007,
|
| 1424 |
+
"step": 2020
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 0.6496,
|
| 1428 |
+
"grad_norm": 0.520455285125112,
|
| 1429 |
+
"learning_rate": 1.6513684706109311e-06,
|
| 1430 |
+
"loss": 0.19024887084960937,
|
| 1431 |
+
"step": 2030
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 0.6528,
|
| 1435 |
+
"grad_norm": 0.5234524283247538,
|
| 1436 |
+
"learning_rate": 1.6251501534997529e-06,
|
| 1437 |
+
"loss": 0.18900917768478392,
|
| 1438 |
+
"step": 2040
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 0.656,
|
| 1442 |
+
"grad_norm": 0.4762667999370438,
|
| 1443 |
+
"learning_rate": 1.5990410302226405e-06,
|
| 1444 |
+
"loss": 0.18147594928741456,
|
| 1445 |
+
"step": 2050
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 0.6592,
|
| 1449 |
+
"grad_norm": 0.4931916769975977,
|
| 1450 |
+
"learning_rate": 1.5730443595734162e-06,
|
| 1451 |
+
"loss": 0.18815698623657226,
|
| 1452 |
+
"step": 2060
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 0.6624,
|
| 1456 |
+
"grad_norm": 0.5595459804684163,
|
| 1457 |
+
"learning_rate": 1.5471633863101982e-06,
|
| 1458 |
+
"loss": 0.18958520889282227,
|
| 1459 |
+
"step": 2070
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 0.6656,
|
| 1463 |
+
"grad_norm": 0.551381176131532,
|
| 1464 |
+
"learning_rate": 1.521401340750407e-06,
|
| 1465 |
+
"loss": 0.1908926248550415,
|
| 1466 |
+
"step": 2080
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 0.6688,
|
| 1470 |
+
"grad_norm": 0.5155022860725758,
|
| 1471 |
+
"learning_rate": 1.495761438367577e-06,
|
| 1472 |
+
"loss": 0.18872777223587037,
|
| 1473 |
+
"step": 2090
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 0.672,
|
| 1477 |
+
"grad_norm": 0.6037433446756716,
|
| 1478 |
+
"learning_rate": 1.4702468793900187e-06,
|
| 1479 |
+
"loss": 0.18800405263900757,
|
| 1480 |
+
"step": 2100
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 0.6752,
|
| 1484 |
+
"grad_norm": 0.5613773833705744,
|
| 1485 |
+
"learning_rate": 1.444860848401384e-06,
|
| 1486 |
+
"loss": 0.18743778467178346,
|
| 1487 |
+
"step": 2110
|
| 1488 |
+
},
|
| 1489 |
+
{
|
| 1490 |
+
"epoch": 0.6784,
|
| 1491 |
+
"grad_norm": 0.5277286435676816,
|
| 1492 |
+
"learning_rate": 1.4196065139431866e-06,
|
| 1493 |
+
"loss": 0.18769149780273436,
|
| 1494 |
+
"step": 2120
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 0.6816,
|
| 1498 |
+
"grad_norm": 0.5487755330646784,
|
| 1499 |
+
"learning_rate": 1.3944870281193178e-06,
|
| 1500 |
+
"loss": 0.1866753101348877,
|
| 1501 |
+
"step": 2130
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 0.6848,
|
| 1505 |
+
"grad_norm": 0.5319334450957595,
|
| 1506 |
+
"learning_rate": 1.3695055262026208e-06,
|
| 1507 |
+
"loss": 0.19193503856658936,
|
| 1508 |
+
"step": 2140
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 0.688,
|
| 1512 |
+
"grad_norm": 0.5061777243502238,
|
| 1513 |
+
"learning_rate": 1.3446651262435679e-06,
|
| 1514 |
+
"loss": 0.18499069213867186,
|
| 1515 |
+
"step": 2150
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6912,
|
| 1519 |
+
"grad_norm": 0.5063080834031065,
|
| 1520 |
+
"learning_rate": 1.3199689286810746e-06,
|
| 1521 |
+
"loss": 0.18700281381607056,
|
| 1522 |
+
"step": 2160
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6944,
|
| 1526 |
+
"grad_norm": 0.5014045449596041,
|
| 1527 |
+
"learning_rate": 1.2954200159555294e-06,
|
| 1528 |
+
"loss": 0.18185386657714844,
|
| 1529 |
+
"step": 2170
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6976,
|
| 1533 |
+
"grad_norm": 0.5417896517828541,
|
| 1534 |
+
"learning_rate": 1.2710214521240527e-06,
|
| 1535 |
+
"loss": 0.18632771968841552,
|
| 1536 |
+
"step": 2180
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.7008,
|
| 1540 |
+
"grad_norm": 0.5710908799443121,
|
| 1541 |
+
"learning_rate": 1.246776282478063e-06,
|
| 1542 |
+
"loss": 0.18732945919036864,
|
| 1543 |
+
"step": 2190
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.704,
|
| 1547 |
+
"grad_norm": 0.5180508096448415,
|
| 1548 |
+
"learning_rate": 1.222687533163181e-06,
|
| 1549 |
+
"loss": 0.18602204322814941,
|
| 1550 |
+
"step": 2200
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.7072,
|
| 1554 |
+
"grad_norm": 0.5480758918229119,
|
| 1555 |
+
"learning_rate": 1.1987582108015228e-06,
|
| 1556 |
+
"loss": 0.18710973262786865,
|
| 1557 |
+
"step": 2210
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 0.7104,
|
| 1561 |
+
"grad_norm": 0.5631818126474104,
|
| 1562 |
+
"learning_rate": 1.1749913021164255e-06,
|
| 1563 |
+
"loss": 0.18828771114349366,
|
| 1564 |
+
"step": 2220
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 0.7136,
|
| 1568 |
+
"grad_norm": 0.4833634541431531,
|
| 1569 |
+
"learning_rate": 1.1513897735596702e-06,
|
| 1570 |
+
"loss": 0.18257718086242675,
|
| 1571 |
+
"step": 2230
|
| 1572 |
+
},
|
| 1573 |
+
{
|
| 1574 |
+
"epoch": 0.7168,
|
| 1575 |
+
"grad_norm": 0.5051522117897481,
|
| 1576 |
+
"learning_rate": 1.127956570941218e-06,
|
| 1577 |
+
"loss": 0.17966469526290893,
|
| 1578 |
+
"step": 2240
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 0.72,
|
| 1582 |
+
"grad_norm": 0.5404271805851407,
|
| 1583 |
+
"learning_rate": 1.104694619061533e-06,
|
| 1584 |
+
"loss": 0.18814800977706908,
|
| 1585 |
+
"step": 2250
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 0.7232,
|
| 1589 |
+
"grad_norm": 0.5147342090287059,
|
| 1590 |
+
"learning_rate": 1.0816068213465295e-06,
|
| 1591 |
+
"loss": 0.1908186197280884,
|
| 1592 |
+
"step": 2260
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 0.7264,
|
| 1596 |
+
"grad_norm": 0.5558495401174878,
|
| 1597 |
+
"learning_rate": 1.0586960594851762e-06,
|
| 1598 |
+
"loss": 0.1859324097633362,
|
| 1599 |
+
"step": 2270
|
| 1600 |
+
},
|
| 1601 |
+
{
|
| 1602 |
+
"epoch": 0.7296,
|
| 1603 |
+
"grad_norm": 0.6185737554957568,
|
| 1604 |
+
"learning_rate": 1.0359651930698217e-06,
|
| 1605 |
+
"loss": 0.18477405309677125,
|
| 1606 |
+
"step": 2280
|
| 1607 |
+
},
|
| 1608 |
+
{
|
| 1609 |
+
"epoch": 0.7328,
|
| 1610 |
+
"grad_norm": 0.5398647348951853,
|
| 1611 |
+
"learning_rate": 1.0134170592392837e-06,
|
| 1612 |
+
"loss": 0.1857767939567566,
|
| 1613 |
+
"step": 2290
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"epoch": 0.736,
|
| 1617 |
+
"grad_norm": 0.5450678028060058,
|
| 1618 |
+
"learning_rate": 9.910544723247204e-07,
|
| 1619 |
+
"loss": 0.184822678565979,
|
| 1620 |
+
"step": 2300
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 0.7392,
|
| 1624 |
+
"grad_norm": 0.5999082382312588,
|
| 1625 |
+
"learning_rate": 9.688802234983706e-07,
|
| 1626 |
+
"loss": 0.18381783962249756,
|
| 1627 |
+
"step": 2310
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"epoch": 0.7424,
|
| 1631 |
+
"grad_norm": 0.5175099712487172,
|
| 1632 |
+
"learning_rate": 9.468970804251742e-07,
|
| 1633 |
+
"loss": 0.18641353845596315,
|
| 1634 |
+
"step": 2320
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 0.7456,
|
| 1638 |
+
"grad_norm": 0.5367638040398911,
|
| 1639 |
+
"learning_rate": 9.251077869173244e-07,
|
| 1640 |
+
"loss": 0.18090612888336183,
|
| 1641 |
+
"step": 2330
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"epoch": 0.7488,
|
| 1645 |
+
"grad_norm": 0.563594153188617,
|
| 1646 |
+
"learning_rate": 9.035150625918054e-07,
|
| 1647 |
+
"loss": 0.18149322271347046,
|
| 1648 |
+
"step": 2340
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 0.752,
|
| 1652 |
+
"grad_norm": 0.5304713442318342,
|
| 1653 |
+
"learning_rate": 8.821216025309395e-07,
|
| 1654 |
+
"loss": 0.18464915752410888,
|
| 1655 |
+
"step": 2350
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 0.7552,
|
| 1659 |
+
"grad_norm": 0.535119183480021,
|
| 1660 |
+
"learning_rate": 8.609300769460055e-07,
|
| 1661 |
+
"loss": 0.1792607307434082,
|
| 1662 |
+
"step": 2360
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 0.7584,
|
| 1666 |
+
"grad_norm": 0.5724539486438234,
|
| 1667 |
+
"learning_rate": 8.399431308439592e-07,
|
| 1668 |
+
"loss": 0.183684778213501,
|
| 1669 |
+
"step": 2370
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 0.7616,
|
| 1673 |
+
"grad_norm": 0.5589161632397335,
|
| 1674 |
+
"learning_rate": 8.191633836972962e-07,
|
| 1675 |
+
"loss": 0.18650429248809813,
|
| 1676 |
+
"step": 2380
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 0.7648,
|
| 1680 |
+
"grad_norm": 0.5386156132762686,
|
| 1681 |
+
"learning_rate": 7.985934291171024e-07,
|
| 1682 |
+
"loss": 0.1821720838546753,
|
| 1683 |
+
"step": 2390
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 0.768,
|
| 1687 |
+
"grad_norm": 0.5321288466713382,
|
| 1688 |
+
"learning_rate": 7.7823583452934e-07,
|
| 1689 |
+
"loss": 0.18489625453948974,
|
| 1690 |
+
"step": 2400
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 0.7712,
|
| 1694 |
+
"grad_norm": 0.5670301824645666,
|
| 1695 |
+
"learning_rate": 7.58093140854389e-07,
|
| 1696 |
+
"loss": 0.18495336771011353,
|
| 1697 |
+
"step": 2410
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 0.7744,
|
| 1701 |
+
"grad_norm": 0.6058756306995335,
|
| 1702 |
+
"learning_rate": 7.381678621899077e-07,
|
| 1703 |
+
"loss": 0.1848145008087158,
|
| 1704 |
+
"step": 2420
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 0.7776,
|
| 1708 |
+
"grad_norm": 0.5477002870283818,
|
| 1709 |
+
"learning_rate": 7.184624854970379e-07,
|
| 1710 |
+
"loss": 0.1817490816116333,
|
| 1711 |
+
"step": 2430
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 0.7808,
|
| 1715 |
+
"grad_norm": 0.5458027173632266,
|
| 1716 |
+
"learning_rate": 6.989794702899932e-07,
|
| 1717 |
+
"loss": 0.18078404664993286,
|
| 1718 |
+
"step": 2440
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 0.784,
|
| 1722 |
+
"grad_norm": 0.5772130708628379,
|
| 1723 |
+
"learning_rate": 6.797212483290777e-07,
|
| 1724 |
+
"loss": 0.18299766778945922,
|
| 1725 |
+
"step": 2450
|
| 1726 |
+
},
|
| 1727 |
+
{
|
| 1728 |
+
"epoch": 0.7872,
|
| 1729 |
+
"grad_norm": 0.5674146932938366,
|
| 1730 |
+
"learning_rate": 6.60690223317171e-07,
|
| 1731 |
+
"loss": 0.1799448013305664,
|
| 1732 |
+
"step": 2460
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 0.7904,
|
| 1736 |
+
"grad_norm": 0.5238538237059384,
|
| 1737 |
+
"learning_rate": 6.418887705997046e-07,
|
| 1738 |
+
"loss": 0.1826066255569458,
|
| 1739 |
+
"step": 2470
|
| 1740 |
+
},
|
| 1741 |
+
{
|
| 1742 |
+
"epoch": 0.7936,
|
| 1743 |
+
"grad_norm": 0.5857270779434125,
|
| 1744 |
+
"learning_rate": 6.23319236868189e-07,
|
| 1745 |
+
"loss": 0.18549437522888185,
|
| 1746 |
+
"step": 2480
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 0.7968,
|
| 1750 |
+
"grad_norm": 0.5274424793724192,
|
| 1751 |
+
"learning_rate": 6.049839398673141e-07,
|
| 1752 |
+
"loss": 0.1865037798881531,
|
| 1753 |
+
"step": 2490
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 0.8,
|
| 1757 |
+
"grad_norm": 0.5820741885019232,
|
| 1758 |
+
"learning_rate": 5.868851681056567e-07,
|
| 1759 |
+
"loss": 0.18739759922027588,
|
| 1760 |
+
"step": 2500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 0.8032,
|
| 1764 |
+
"grad_norm": 0.559971376703767,
|
| 1765 |
+
"learning_rate": 5.690251805700467e-07,
|
| 1766 |
+
"loss": 0.1853170394897461,
|
| 1767 |
+
"step": 2510
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 0.8064,
|
| 1771 |
+
"grad_norm": 0.5456407872897143,
|
| 1772 |
+
"learning_rate": 5.514062064436096e-07,
|
| 1773 |
+
"loss": 0.18589026927948,
|
| 1774 |
+
"step": 2520
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 0.8096,
|
| 1778 |
+
"grad_norm": 0.5866178273652722,
|
| 1779 |
+
"learning_rate": 5.34030444827533e-07,
|
| 1780 |
+
"loss": 0.1827709197998047,
|
| 1781 |
+
"step": 2530
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 0.8128,
|
| 1785 |
+
"grad_norm": 0.588749656654477,
|
| 1786 |
+
"learning_rate": 5.169000644665895e-07,
|
| 1787 |
+
"loss": 0.1794450044631958,
|
| 1788 |
+
"step": 2540
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 0.816,
|
| 1792 |
+
"grad_norm": 0.5778176841150756,
|
| 1793 |
+
"learning_rate": 5.000172034784442e-07,
|
| 1794 |
+
"loss": 0.18060548305511476,
|
| 1795 |
+
"step": 2550
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.8192,
|
| 1799 |
+
"grad_norm": 0.566426267196354,
|
| 1800 |
+
"learning_rate": 4.833839690867853e-07,
|
| 1801 |
+
"loss": 0.18326361179351808,
|
| 1802 |
+
"step": 2560
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 0.8224,
|
| 1806 |
+
"grad_norm": 0.5763812670051818,
|
| 1807 |
+
"learning_rate": 4.6700243735831705e-07,
|
| 1808 |
+
"loss": 0.17798151969909667,
|
| 1809 |
+
"step": 2570
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 0.8256,
|
| 1813 |
+
"grad_norm": 0.5465254160649792,
|
| 1814 |
+
"learning_rate": 4.508746529436311e-07,
|
| 1815 |
+
"loss": 0.1761394739151001,
|
| 1816 |
+
"step": 2580
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 0.8288,
|
| 1820 |
+
"grad_norm": 0.5717164779412172,
|
| 1821 |
+
"learning_rate": 4.350026288220083e-07,
|
| 1822 |
+
"loss": 0.18241602182388306,
|
| 1823 |
+
"step": 2590
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 0.832,
|
| 1827 |
+
"grad_norm": 0.5532919690194787,
|
| 1828 |
+
"learning_rate": 4.1938834605017133e-07,
|
| 1829 |
+
"loss": 0.1799800157546997,
|
| 1830 |
+
"step": 2600
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 0.8352,
|
| 1834 |
+
"grad_norm": 0.5485503614596886,
|
| 1835 |
+
"learning_rate": 4.0403375351501515e-07,
|
| 1836 |
+
"loss": 0.18037915229797363,
|
| 1837 |
+
"step": 2610
|
| 1838 |
+
},
|
| 1839 |
+
{
|
| 1840 |
+
"epoch": 0.8384,
|
| 1841 |
+
"grad_norm": 0.5921392059955939,
|
| 1842 |
+
"learning_rate": 3.88940767690362e-07,
|
| 1843 |
+
"loss": 0.17850807905197144,
|
| 1844 |
+
"step": 2620
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 0.8416,
|
| 1848 |
+
"grad_norm": 0.6173777417506611,
|
| 1849 |
+
"learning_rate": 3.7411127239775774e-07,
|
| 1850 |
+
"loss": 0.17773046493530273,
|
| 1851 |
+
"step": 2630
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 0.8448,
|
| 1855 |
+
"grad_norm": 0.5704461135916385,
|
| 1856 |
+
"learning_rate": 3.595471185713431e-07,
|
| 1857 |
+
"loss": 0.17534157037734985,
|
| 1858 |
+
"step": 2640
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 0.848,
|
| 1862 |
+
"grad_norm": 0.6016600022490033,
|
| 1863 |
+
"learning_rate": 3.4525012402682826e-07,
|
| 1864 |
+
"loss": 0.17784465551376344,
|
| 1865 |
+
"step": 2650
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 0.8512,
|
| 1869 |
+
"grad_norm": 0.5793357844007763,
|
| 1870 |
+
"learning_rate": 3.3122207323460804e-07,
|
| 1871 |
+
"loss": 0.17941689491271973,
|
| 1872 |
+
"step": 2660
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 0.8544,
|
| 1876 |
+
"grad_norm": 0.5402101980665998,
|
| 1877 |
+
"learning_rate": 3.1746471709702963e-07,
|
| 1878 |
+
"loss": 0.17694177627563476,
|
| 1879 |
+
"step": 2670
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 0.8576,
|
| 1883 |
+
"grad_norm": 0.5764717205309013,
|
| 1884 |
+
"learning_rate": 3.039797727298585e-07,
|
| 1885 |
+
"loss": 0.18307201862335204,
|
| 1886 |
+
"step": 2680
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 0.8608,
|
| 1890 |
+
"grad_norm": 0.6021889152147203,
|
| 1891 |
+
"learning_rate": 2.9076892324795546e-07,
|
| 1892 |
+
"loss": 0.18175405263900757,
|
| 1893 |
+
"step": 2690
|
| 1894 |
+
},
|
| 1895 |
+
{
|
| 1896 |
+
"epoch": 0.864,
|
| 1897 |
+
"grad_norm": 0.5783244972157141,
|
| 1898 |
+
"learning_rate": 2.778338175551995e-07,
|
| 1899 |
+
"loss": 0.17646790742874147,
|
| 1900 |
+
"step": 2700
|
| 1901 |
+
},
|
| 1902 |
+
{
|
| 1903 |
+
"epoch": 0.8672,
|
| 1904 |
+
"grad_norm": 0.573282650162234,
|
| 1905 |
+
"learning_rate": 2.6517607013868326e-07,
|
| 1906 |
+
"loss": 0.18459818363189698,
|
| 1907 |
+
"step": 2710
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 0.8704,
|
| 1911 |
+
"grad_norm": 0.6039696058732922,
|
| 1912 |
+
"learning_rate": 2.527972608672002e-07,
|
| 1913 |
+
"loss": 0.18084490299224854,
|
| 1914 |
+
"step": 2720
|
| 1915 |
+
},
|
| 1916 |
+
{
|
| 1917 |
+
"epoch": 0.8736,
|
| 1918 |
+
"grad_norm": 0.5916439702722857,
|
| 1919 |
+
"learning_rate": 2.40698934794053e-07,
|
| 1920 |
+
"loss": 0.18053301572799682,
|
| 1921 |
+
"step": 2730
|
| 1922 |
+
},
|
| 1923 |
+
{
|
| 1924 |
+
"epoch": 0.8768,
|
| 1925 |
+
"grad_norm": 0.5703451942226244,
|
| 1926 |
+
"learning_rate": 2.2888260196421237e-07,
|
| 1927 |
+
"loss": 0.1792958378791809,
|
| 1928 |
+
"step": 2740
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 0.88,
|
| 1932 |
+
"grad_norm": 0.5672304805383847,
|
| 1933 |
+
"learning_rate": 2.1734973722583735e-07,
|
| 1934 |
+
"loss": 0.1819172501564026,
|
| 1935 |
+
"step": 2750
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"epoch": 0.8832,
|
| 1939 |
+
"grad_norm": 0.5784570642525821,
|
| 1940 |
+
"learning_rate": 2.0610178004619564e-07,
|
| 1941 |
+
"loss": 0.17332799434661866,
|
| 1942 |
+
"step": 2760
|
| 1943 |
+
},
|
| 1944 |
+
{
|
| 1945 |
+
"epoch": 0.8864,
|
| 1946 |
+
"grad_norm": 0.575451427907292,
|
| 1947 |
+
"learning_rate": 1.9514013433199834e-07,
|
| 1948 |
+
"loss": 0.18558990955352783,
|
| 1949 |
+
"step": 2770
|
| 1950 |
+
},
|
| 1951 |
+
{
|
| 1952 |
+
"epoch": 0.8896,
|
| 1953 |
+
"grad_norm": 0.5133461724908028,
|
| 1954 |
+
"learning_rate": 1.8446616825416958e-07,
|
| 1955 |
+
"loss": 0.18399085998535156,
|
| 1956 |
+
"step": 2780
|
| 1957 |
+
},
|
| 1958 |
+
{
|
| 1959 |
+
"epoch": 0.8928,
|
| 1960 |
+
"grad_norm": 0.6123280023323261,
|
| 1961 |
+
"learning_rate": 1.7408121407708007e-07,
|
| 1962 |
+
"loss": 0.1844745397567749,
|
| 1963 |
+
"step": 2790
|
| 1964 |
+
},
|
| 1965 |
+
{
|
| 1966 |
+
"epoch": 0.896,
|
| 1967 |
+
"grad_norm": 0.5761361465385083,
|
| 1968 |
+
"learning_rate": 1.6398656799226253e-07,
|
| 1969 |
+
"loss": 0.17304511070251466,
|
| 1970 |
+
"step": 2800
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 0.8992,
|
| 1974 |
+
"grad_norm": 0.6034414454227958,
|
| 1975 |
+
"learning_rate": 1.5418348995662773e-07,
|
| 1976 |
+
"loss": 0.17871806621551514,
|
| 1977 |
+
"step": 2810
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 0.9024,
|
| 1981 |
+
"grad_norm": 0.5923974971972374,
|
| 1982 |
+
"learning_rate": 1.4467320353520275e-07,
|
| 1983 |
+
"loss": 0.17667040824890137,
|
| 1984 |
+
"step": 2820
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 0.9056,
|
| 1988 |
+
"grad_norm": 0.603734748014922,
|
| 1989 |
+
"learning_rate": 1.3545689574841341e-07,
|
| 1990 |
+
"loss": 0.1787508487701416,
|
| 1991 |
+
"step": 2830
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 0.9088,
|
| 1995 |
+
"grad_norm": 0.5750783540393263,
|
| 1996 |
+
"learning_rate": 1.26535716923927e-07,
|
| 1997 |
+
"loss": 0.18438329696655273,
|
| 1998 |
+
"step": 2840
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"epoch": 0.912,
|
| 2002 |
+
"grad_norm": 0.5716942434142535,
|
| 2003 |
+
"learning_rate": 1.1791078055307493e-07,
|
| 2004 |
+
"loss": 0.1802410364151001,
|
| 2005 |
+
"step": 2850
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 0.9152,
|
| 2009 |
+
"grad_norm": 0.6031535401501658,
|
| 2010 |
+
"learning_rate": 1.0958316315187289e-07,
|
| 2011 |
+
"loss": 0.17950894832611083,
|
| 2012 |
+
"step": 2860
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 0.9184,
|
| 2016 |
+
"grad_norm": 0.5724651470732645,
|
| 2017 |
+
"learning_rate": 1.0155390412665528e-07,
|
| 2018 |
+
"loss": 0.17800890207290648,
|
| 2019 |
+
"step": 2870
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 0.9216,
|
| 2023 |
+
"grad_norm": 0.5920847136083833,
|
| 2024 |
+
"learning_rate": 9.38240056443443e-08,
|
| 2025 |
+
"loss": 0.17559461593627929,
|
| 2026 |
+
"step": 2880
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 0.9248,
|
| 2030 |
+
"grad_norm": 0.5600845233888927,
|
| 2031 |
+
"learning_rate": 8.639443250736402e-08,
|
| 2032 |
+
"loss": 0.17780338525772094,
|
| 2033 |
+
"step": 2890
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 0.928,
|
| 2037 |
+
"grad_norm": 0.5760602589693042,
|
| 2038 |
+
"learning_rate": 7.926611203321777e-08,
|
| 2039 |
+
"loss": 0.1794909954071045,
|
| 2040 |
+
"step": 2900
|
| 2041 |
+
},
|
| 2042 |
+
{
|
| 2043 |
+
"epoch": 0.9312,
|
| 2044 |
+
"grad_norm": 0.59057677772977,
|
| 2045 |
+
"learning_rate": 7.243993393874882e-08,
|
| 2046 |
+
"loss": 0.1795297384262085,
|
| 2047 |
+
"step": 2910
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 0.9344,
|
| 2051 |
+
"grad_norm": 0.5693422129621047,
|
| 2052 |
+
"learning_rate": 6.591675022908805e-08,
|
| 2053 |
+
"loss": 0.17676992416381837,
|
| 2054 |
+
"step": 2920
|
| 2055 |
+
},
|
| 2056 |
+
{
|
| 2057 |
+
"epoch": 0.9376,
|
| 2058 |
+
"grad_norm": 0.5656532345210596,
|
| 2059 |
+
"learning_rate": 5.969737509131241e-08,
|
| 2060 |
+
"loss": 0.17433459758758546,
|
| 2061 |
+
"step": 2930
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"epoch": 0.9408,
|
| 2065 |
+
"grad_norm": 0.5865348817236666,
|
| 2066 |
+
"learning_rate": 5.3782584792823334e-08,
|
| 2067 |
+
"loss": 0.1795581579208374,
|
| 2068 |
+
"step": 2940
|
| 2069 |
+
},
|
| 2070 |
+
{
|
| 2071 |
+
"epoch": 0.944,
|
| 2072 |
+
"grad_norm": 0.6034375830769324,
|
| 2073 |
+
"learning_rate": 4.817311758445686e-08,
|
| 2074 |
+
"loss": 0.18066773414611817,
|
| 2075 |
+
"step": 2950
|
| 2076 |
+
},
|
| 2077 |
+
{
|
| 2078 |
+
"epoch": 0.9472,
|
| 2079 |
+
"grad_norm": 0.598761782830776,
|
| 2080 |
+
"learning_rate": 4.286967360833866e-08,
|
| 2081 |
+
"loss": 0.1803189516067505,
|
| 2082 |
+
"step": 2960
|
| 2083 |
+
},
|
| 2084 |
+
{
|
| 2085 |
+
"epoch": 0.9504,
|
| 2086 |
+
"grad_norm": 0.5410244646488507,
|
| 2087 |
+
"learning_rate": 3.787291481049754e-08,
|
| 2088 |
+
"loss": 0.18075671195983886,
|
| 2089 |
+
"step": 2970
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 0.9536,
|
| 2093 |
+
"grad_norm": 0.6102805369465131,
|
| 2094 |
+
"learning_rate": 3.3183464858244364e-08,
|
| 2095 |
+
"loss": 0.18705531358718872,
|
| 2096 |
+
"step": 2980
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 0.9568,
|
| 2100 |
+
"grad_norm": 0.5798299084498433,
|
| 2101 |
+
"learning_rate": 2.8801909062328992e-08,
|
| 2102 |
+
"loss": 0.17331962585449218,
|
| 2103 |
+
"step": 2990
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 0.96,
|
| 2107 |
+
"grad_norm": 0.5999449762716584,
|
| 2108 |
+
"learning_rate": 2.4728794303886248e-08,
|
| 2109 |
+
"loss": 0.17158935070037842,
|
| 2110 |
+
"step": 3000
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 0.9632,
|
| 2114 |
+
"grad_norm": 0.6212882795186798,
|
| 2115 |
+
"learning_rate": 2.0964628966175794e-08,
|
| 2116 |
+
"loss": 0.17738908529281616,
|
| 2117 |
+
"step": 3010
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 0.9664,
|
| 2121 |
+
"grad_norm": 0.564746561855876,
|
| 2122 |
+
"learning_rate": 1.750988287113009e-08,
|
| 2123 |
+
"loss": 0.17667733430862426,
|
| 2124 |
+
"step": 3020
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 0.9696,
|
| 2128 |
+
"grad_norm": 0.5852806549215316,
|
| 2129 |
+
"learning_rate": 1.4364987220713278e-08,
|
| 2130 |
+
"loss": 0.18457986116409303,
|
| 2131 |
+
"step": 3030
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 0.9728,
|
| 2135 |
+
"grad_norm": 0.5991233203919278,
|
| 2136 |
+
"learning_rate": 1.1530334543099763e-08,
|
| 2137 |
+
"loss": 0.18215363025665282,
|
| 2138 |
+
"step": 3040
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 0.976,
|
| 2142 |
+
"grad_norm": 0.6041102228390866,
|
| 2143 |
+
"learning_rate": 9.006278643683697e-09,
|
| 2144 |
+
"loss": 0.18243587017059326,
|
| 2145 |
+
"step": 3050
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 0.9792,
|
| 2149 |
+
"grad_norm": 0.5869697890802611,
|
| 2150 |
+
"learning_rate": 6.793134560916514e-09,
|
| 2151 |
+
"loss": 0.18486570119857787,
|
| 2152 |
+
"step": 3060
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.9824,
|
| 2156 |
+
"grad_norm": 0.5595978682216465,
|
| 2157 |
+
"learning_rate": 4.891178526986451e-09,
|
| 2158 |
+
"loss": 0.18047856092453002,
|
| 2159 |
+
"step": 3070
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 0.9856,
|
| 2163 |
+
"grad_norm": 0.5638404572903396,
|
| 2164 |
+
"learning_rate": 3.3006479333413943e-09,
|
| 2165 |
+
"loss": 0.18349089622497558,
|
| 2166 |
+
"step": 3080
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.9888,
|
| 2170 |
+
"grad_norm": 0.5582534730189623,
|
| 2171 |
+
"learning_rate": 2.021741301058422e-09,
|
| 2172 |
+
"loss": 0.18032891750335694,
|
| 2173 |
+
"step": 3090
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.992,
|
| 2177 |
+
"grad_norm": 0.5757824692806152,
|
| 2178 |
+
"learning_rate": 1.0546182560652872e-09,
|
| 2179 |
+
"loss": 0.1812995433807373,
|
| 2180 |
+
"step": 3100
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.9952,
|
| 2184 |
+
"grad_norm": 0.5718406851297113,
|
| 2185 |
+
"learning_rate": 3.9939950921774607e-10,
|
| 2186 |
+
"loss": 0.17747504711151124,
|
| 2187 |
+
"step": 3110
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.9984,
|
| 2191 |
+
"grad_norm": 0.549457935685087,
|
| 2192 |
+
"learning_rate": 5.616684123160854e-11,
|
| 2193 |
+
"loss": 0.17633507251739503,
|
| 2194 |
+
"step": 3120
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 1.0,
|
| 2198 |
+
"step": 3125,
|
| 2199 |
+
"total_flos": 2477163648385024.0,
|
| 2200 |
+
"train_loss": 0.20598802658081056,
|
| 2201 |
+
"train_runtime": 35266.4791,
|
| 2202 |
+
"train_samples_per_second": 5.671,
|
| 2203 |
+
"train_steps_per_second": 0.089
|
| 2204 |
+
}
|
| 2205 |
+
],
|
| 2206 |
+
"logging_steps": 10,
|
| 2207 |
+
"max_steps": 3125,
|
| 2208 |
+
"num_input_tokens_seen": 0,
|
| 2209 |
+
"num_train_epochs": 1,
|
| 2210 |
+
"save_steps": 500,
|
| 2211 |
+
"stateful_callbacks": {
|
| 2212 |
+
"TrainerControl": {
|
| 2213 |
+
"args": {
|
| 2214 |
+
"should_epoch_stop": false,
|
| 2215 |
+
"should_evaluate": false,
|
| 2216 |
+
"should_log": false,
|
| 2217 |
+
"should_save": true,
|
| 2218 |
+
"should_training_stop": true
|
| 2219 |
+
},
|
| 2220 |
+
"attributes": {}
|
| 2221 |
+
}
|
| 2222 |
+
},
|
| 2223 |
+
"total_flos": 2477163648385024.0,
|
| 2224 |
+
"train_batch_size": 4,
|
| 2225 |
+
"trial_name": null,
|
| 2226 |
+
"trial_params": null
|
| 2227 |
+
}
|
checkpoints/GLM-4.6V-Flash-SFT/training_loss.png
ADDED
|
checkpoints/Gemma-4-E4B-it-SFT/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.0913057758773248e+16,
|
| 4 |
+
"train_loss": 0.7292402684783935,
|
| 5 |
+
"train_runtime": 30167.0559,
|
| 6 |
+
"train_samples_per_second": 6.63,
|
| 7 |
+
"train_steps_per_second": 0.104
|
| 8 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/chat_template.jinja
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- macro format_parameters(properties, required) -%}
|
| 2 |
+
{%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
|
| 3 |
+
{%- set ns = namespace(found_first=false) -%}
|
| 4 |
+
{%- for key, value in properties | dictsort -%}
|
| 5 |
+
{%- set add_comma = false -%}
|
| 6 |
+
{%- if key not in standard_keys -%}
|
| 7 |
+
{%- if ns.found_first %},{% endif -%}
|
| 8 |
+
{%- set ns.found_first = true -%}
|
| 9 |
+
{{ key }}:{
|
| 10 |
+
{%- if value['description'] -%}
|
| 11 |
+
description:<|"|>{{ value['description'] }}<|"|>
|
| 12 |
+
{%- set add_comma = true -%}
|
| 13 |
+
{%- endif -%}
|
| 14 |
+
{%- if value['nullable'] %}
|
| 15 |
+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
|
| 16 |
+
nullable:true
|
| 17 |
+
{%- endif -%}
|
| 18 |
+
{%- if value['type'] | upper == 'STRING' -%}
|
| 19 |
+
{%- if value['enum'] -%}
|
| 20 |
+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
|
| 21 |
+
enum:{{ format_argument(value['enum']) }}
|
| 22 |
+
{%- endif -%}
|
| 23 |
+
{%- elif value['type'] | upper == 'OBJECT' -%}
|
| 24 |
+
,properties:{
|
| 25 |
+
{%- if value['properties'] is defined and value['properties'] is mapping -%}
|
| 26 |
+
{{- format_parameters(value['properties'], value['required'] | default([])) -}}
|
| 27 |
+
{%- elif value is mapping -%}
|
| 28 |
+
{{- format_parameters(value, value['required'] | default([])) -}}
|
| 29 |
+
{%- endif -%}
|
| 30 |
+
}
|
| 31 |
+
{%- if value['required'] -%}
|
| 32 |
+
,required:[
|
| 33 |
+
{%- for item in value['required'] | default([]) -%}
|
| 34 |
+
<|"|>{{- item -}}<|"|>
|
| 35 |
+
{%- if not loop.last %},{% endif -%}
|
| 36 |
+
{%- endfor -%}
|
| 37 |
+
]
|
| 38 |
+
{%- endif -%}
|
| 39 |
+
{%- elif value['type'] | upper == 'ARRAY' -%}
|
| 40 |
+
{%- if value['items'] is mapping and value['items'] -%}
|
| 41 |
+
,items:{
|
| 42 |
+
{%- set ns_items = namespace(found_first=false) -%}
|
| 43 |
+
{%- for item_key, item_value in value['items'] | dictsort -%}
|
| 44 |
+
{%- if item_value is not none -%}
|
| 45 |
+
{%- if ns_items.found_first %},{% endif -%}
|
| 46 |
+
{%- set ns_items.found_first = true -%}
|
| 47 |
+
{%- if item_key == 'properties' -%}
|
| 48 |
+
properties:{
|
| 49 |
+
{%- if item_value is mapping -%}
|
| 50 |
+
{{- format_parameters(item_value, value['items']['required'] | default([])) -}}
|
| 51 |
+
{%- endif -%}
|
| 52 |
+
}
|
| 53 |
+
{%- elif item_key == 'required' -%}
|
| 54 |
+
required:[
|
| 55 |
+
{%- for req_item in item_value -%}
|
| 56 |
+
<|"|>{{- req_item -}}<|"|>
|
| 57 |
+
{%- if not loop.last %},{% endif -%}
|
| 58 |
+
{%- endfor -%}
|
| 59 |
+
]
|
| 60 |
+
{%- elif item_key == 'type' -%}
|
| 61 |
+
{%- if item_value is string -%}
|
| 62 |
+
type:{{ format_argument(item_value | upper) }}
|
| 63 |
+
{%- else -%}
|
| 64 |
+
type:{{ format_argument(item_value | map('upper') | list) }}
|
| 65 |
+
{%- endif -%}
|
| 66 |
+
{%- else -%}
|
| 67 |
+
{{ item_key }}:{{ format_argument(item_value) }}
|
| 68 |
+
{%- endif -%}
|
| 69 |
+
{%- endif -%}
|
| 70 |
+
{%- endfor -%}
|
| 71 |
+
}
|
| 72 |
+
{%- endif -%}
|
| 73 |
+
{%- endif -%}
|
| 74 |
+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
|
| 75 |
+
type:<|"|>{{ value['type'] | upper }}<|"|>}
|
| 76 |
+
{%- endif -%}
|
| 77 |
+
{%- endfor -%}
|
| 78 |
+
{%- endmacro -%}
|
| 79 |
+
{%- macro format_function_declaration(tool_data) -%}
|
| 80 |
+
declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
|
| 81 |
+
{%- set params = tool_data['function']['parameters'] -%}
|
| 82 |
+
{%- if params -%}
|
| 83 |
+
,parameters:{
|
| 84 |
+
{%- if params['properties'] -%}
|
| 85 |
+
properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
|
| 86 |
+
{%- endif -%}
|
| 87 |
+
{%- if params['required'] -%}
|
| 88 |
+
required:[
|
| 89 |
+
{%- for item in params['required'] -%}
|
| 90 |
+
<|"|>{{- item -}}<|"|>
|
| 91 |
+
{{- ',' if not loop.last -}}
|
| 92 |
+
{%- endfor -%}
|
| 93 |
+
],
|
| 94 |
+
{%- endif -%}
|
| 95 |
+
{%- if params['type'] -%}
|
| 96 |
+
type:<|"|>{{- params['type'] | upper -}}<|"|>}
|
| 97 |
+
{%- endif -%}
|
| 98 |
+
{%- endif -%}
|
| 99 |
+
{%- if 'response' in tool_data['function'] -%}
|
| 100 |
+
{%- set response_declaration = tool_data['function']['response'] -%}
|
| 101 |
+
,response:{
|
| 102 |
+
{%- if response_declaration['description'] -%}
|
| 103 |
+
description:<|"|>{{- response_declaration['description'] -}}<|"|>,
|
| 104 |
+
{%- endif -%}
|
| 105 |
+
{%- if response_declaration['type'] | upper == 'OBJECT' -%}
|
| 106 |
+
type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
|
| 107 |
+
{%- endif -%}
|
| 108 |
+
{%- endif -%}
|
| 109 |
+
}
|
| 110 |
+
{%- endmacro -%}
|
| 111 |
+
{%- macro format_argument(argument, escape_keys=True) -%}
|
| 112 |
+
{%- if argument is string -%}
|
| 113 |
+
{{- '<|"|>' + argument + '<|"|>' -}}
|
| 114 |
+
{%- elif argument is boolean -%}
|
| 115 |
+
{{- 'true' if argument else 'false' -}}
|
| 116 |
+
{%- elif argument is mapping -%}
|
| 117 |
+
{{- '{' -}}
|
| 118 |
+
{%- set ns = namespace(found_first=false) -%}
|
| 119 |
+
{%- for key, value in argument | dictsort -%}
|
| 120 |
+
{%- if ns.found_first %},{% endif -%}
|
| 121 |
+
{%- set ns.found_first = true -%}
|
| 122 |
+
{%- if escape_keys -%}
|
| 123 |
+
{{- '<|"|>' + key + '<|"|>' -}}
|
| 124 |
+
{%- else -%}
|
| 125 |
+
{{- key -}}
|
| 126 |
+
{%- endif -%}
|
| 127 |
+
:{{- format_argument(value, escape_keys=escape_keys) -}}
|
| 128 |
+
{%- endfor -%}
|
| 129 |
+
{{- '}' -}}
|
| 130 |
+
{%- elif argument is sequence -%}
|
| 131 |
+
{{- '[' -}}
|
| 132 |
+
{%- for item in argument -%}
|
| 133 |
+
{{- format_argument(item, escape_keys=escape_keys) -}}
|
| 134 |
+
{%- if not loop.last %},{% endif -%}
|
| 135 |
+
{%- endfor -%}
|
| 136 |
+
{{- ']' -}}
|
| 137 |
+
{%- else -%}
|
| 138 |
+
{{- argument -}}
|
| 139 |
+
{%- endif -%}
|
| 140 |
+
{%- endmacro -%}
|
| 141 |
+
{%- macro strip_thinking(text) -%}
|
| 142 |
+
{%- set ns = namespace(result='') -%}
|
| 143 |
+
{%- for part in text.split('<channel|>') -%}
|
| 144 |
+
{%- if '<|channel>' in part -%}
|
| 145 |
+
{%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
|
| 146 |
+
{%- else -%}
|
| 147 |
+
{%- set ns.result = ns.result + part -%}
|
| 148 |
+
{%- endif -%}
|
| 149 |
+
{%- endfor -%}
|
| 150 |
+
{{- ns.result | trim -}}
|
| 151 |
+
{%- endmacro -%}
|
| 152 |
+
|
| 153 |
+
{%- set ns = namespace(prev_message_type=None) -%}
|
| 154 |
+
{%- set loop_messages = messages -%}
|
| 155 |
+
{{ bos_token }}
|
| 156 |
+
{#- Handle System/Tool Definitions Block -#}
|
| 157 |
+
{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
|
| 158 |
+
{{- '<|turn>system\n' -}}
|
| 159 |
+
|
| 160 |
+
{#- Inject Thinking token at the very top of the FIRST system turn -#}
|
| 161 |
+
{%- if enable_thinking is defined and enable_thinking -%}
|
| 162 |
+
{{- '<|think|>' -}}
|
| 163 |
+
{%- set ns.prev_message_type = 'think' -%}
|
| 164 |
+
{%- endif -%}
|
| 165 |
+
|
| 166 |
+
{%- if messages[0]['role'] in ['system', 'developer'] -%}
|
| 167 |
+
{{- messages[0]['content'] | trim -}}
|
| 168 |
+
{%- set loop_messages = messages[1:] -%}
|
| 169 |
+
{%- endif -%}
|
| 170 |
+
|
| 171 |
+
{%- if tools -%}
|
| 172 |
+
{%- for tool in tools %}
|
| 173 |
+
{{- '<|tool>' -}}
|
| 174 |
+
{{- format_function_declaration(tool) | trim -}}
|
| 175 |
+
{{- '<tool|>' -}}
|
| 176 |
+
{%- endfor %}
|
| 177 |
+
{%- set ns.prev_message_type = 'tool' -%}
|
| 178 |
+
{%- endif -%}
|
| 179 |
+
|
| 180 |
+
{{- '<turn|>\n' -}}
|
| 181 |
+
{%- endif %}
|
| 182 |
+
|
| 183 |
+
{#- Loop through messages -#}
|
| 184 |
+
{%- for message in loop_messages -%}
|
| 185 |
+
{%- set ns.prev_message_type = None -%}
|
| 186 |
+
{%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
|
| 187 |
+
{{- '<|turn>' + role + '\n' }}
|
| 188 |
+
|
| 189 |
+
{%- if message['tool_calls'] -%}
|
| 190 |
+
{%- for tool_call in message['tool_calls'] -%}
|
| 191 |
+
{%- set function = tool_call['function'] -%}
|
| 192 |
+
{{- '<|tool_call>call:' + function['name'] + '{' -}}
|
| 193 |
+
{%- if function['arguments'] is mapping -%}
|
| 194 |
+
{%- set ns_args = namespace(found_first=false) -%}
|
| 195 |
+
{%- for key, value in function['arguments'] | dictsort -%}
|
| 196 |
+
{%- if ns_args.found_first %},{% endif -%}
|
| 197 |
+
{%- set ns_args.found_first = true -%}
|
| 198 |
+
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
|
| 199 |
+
{%- endfor -%}
|
| 200 |
+
{%- elif function['arguments'] is string -%}
|
| 201 |
+
{{- function['arguments'] -}}
|
| 202 |
+
{%- endif -%}
|
| 203 |
+
{{- '}<tool_call|>' -}}
|
| 204 |
+
{%- endfor -%}
|
| 205 |
+
{%- set ns.prev_message_type = 'tool_call' -%}
|
| 206 |
+
{%- endif -%}
|
| 207 |
+
|
| 208 |
+
{%- if message['tool_responses'] -%}
|
| 209 |
+
{#- Tool Response handling -#}
|
| 210 |
+
{%- for tool_response in message['tool_responses'] -%}
|
| 211 |
+
{{- '<|tool_response>' -}}
|
| 212 |
+
{%- if tool_response['response'] is mapping -%}
|
| 213 |
+
{{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}
|
| 214 |
+
{%- for key, value in tool_response['response'] | dictsort -%}
|
| 215 |
+
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
|
| 216 |
+
{%- if not loop.last %},{% endif -%}
|
| 217 |
+
{%- endfor -%}
|
| 218 |
+
{{- '}' -}}
|
| 219 |
+
{%- else -%}
|
| 220 |
+
{{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}
|
| 221 |
+
{%- endif -%}
|
| 222 |
+
{{- '<tool_response|>' -}}
|
| 223 |
+
{%- endfor -%}
|
| 224 |
+
{%- set ns.prev_message_type = 'tool_response' -%}
|
| 225 |
+
{%- endif -%}
|
| 226 |
+
|
| 227 |
+
{%- if message['content'] is string -%}
|
| 228 |
+
{%- if role == 'model' -%}
|
| 229 |
+
{{- strip_thinking(message['content']) -}}
|
| 230 |
+
{%- else -%}
|
| 231 |
+
{{- message['content'] | trim -}}
|
| 232 |
+
{%- endif -%}
|
| 233 |
+
{%- elif message['content'] is sequence -%}
|
| 234 |
+
{%- for item in message['content'] -%}
|
| 235 |
+
{%- if item['type'] == 'text' -%}
|
| 236 |
+
{%- if role == 'model' -%}
|
| 237 |
+
{{- strip_thinking(item['text']) -}}
|
| 238 |
+
{%- else -%}
|
| 239 |
+
{{- item['text'] | trim -}}
|
| 240 |
+
{%- endif -%}
|
| 241 |
+
{%- elif item['type'] == 'image' -%}
|
| 242 |
+
{{- '\n\n<|image|>\n\n' -}}
|
| 243 |
+
{%- set ns.prev_message_type = 'image' -%}
|
| 244 |
+
{%- elif item['type'] == 'audio' -%}
|
| 245 |
+
{{- '<|audio|>' -}}
|
| 246 |
+
{%- set ns.prev_message_type = 'audio' -%}
|
| 247 |
+
{%- elif item['type'] == 'video' -%}
|
| 248 |
+
{{- '\n\n<|video|>\n\n' -}}
|
| 249 |
+
{%- set ns.prev_message_type = 'video' -%}
|
| 250 |
+
{%- endif -%}
|
| 251 |
+
{%- endfor -%}
|
| 252 |
+
{%- endif -%}
|
| 253 |
+
|
| 254 |
+
{%- if not (message['tool_responses'] and not message['content']) -%}
|
| 255 |
+
{{- '<turn|>\n' -}}
|
| 256 |
+
{%- endif -%}
|
| 257 |
+
{%- endfor -%}
|
| 258 |
+
|
| 259 |
+
{%- if add_generation_prompt -%}
|
| 260 |
+
{%- if ns.prev_message_type != 'tool_response' -%}
|
| 261 |
+
{{- '<|turn>model\n' -}}
|
| 262 |
+
{%- endif -%}
|
| 263 |
+
{%- endif -%}
|
checkpoints/Gemma-4-E4B-it-SFT/config.json
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Gemma4ForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"audio_config": {
|
| 6 |
+
"_name_or_path": "",
|
| 7 |
+
"architectures": null,
|
| 8 |
+
"attention_chunk_size": 12,
|
| 9 |
+
"attention_context_left": 13,
|
| 10 |
+
"attention_context_right": 0,
|
| 11 |
+
"attention_invalid_logits_value": -1000000000.0,
|
| 12 |
+
"attention_logit_cap": 50.0,
|
| 13 |
+
"chunk_size_feed_forward": 0,
|
| 14 |
+
"conv_kernel_size": 5,
|
| 15 |
+
"dtype": "bfloat16",
|
| 16 |
+
"gradient_clipping": 10000000000.0,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 1024,
|
| 19 |
+
"id2label": {
|
| 20 |
+
"0": "LABEL_0",
|
| 21 |
+
"1": "LABEL_1"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"is_encoder_decoder": false,
|
| 25 |
+
"label2id": {
|
| 26 |
+
"LABEL_0": 0,
|
| 27 |
+
"LABEL_1": 1
|
| 28 |
+
},
|
| 29 |
+
"model_type": "gemma4_audio",
|
| 30 |
+
"num_attention_heads": 8,
|
| 31 |
+
"num_hidden_layers": 12,
|
| 32 |
+
"output_attentions": false,
|
| 33 |
+
"output_hidden_states": false,
|
| 34 |
+
"output_proj_dims": 1536,
|
| 35 |
+
"problem_type": null,
|
| 36 |
+
"residual_weight": 0.5,
|
| 37 |
+
"return_dict": true,
|
| 38 |
+
"rms_norm_eps": 1e-06,
|
| 39 |
+
"subsampling_conv_channels": [
|
| 40 |
+
128,
|
| 41 |
+
32
|
| 42 |
+
],
|
| 43 |
+
"use_clipped_linears": true
|
| 44 |
+
},
|
| 45 |
+
"audio_token_id": 258881,
|
| 46 |
+
"boa_token_id": 256000,
|
| 47 |
+
"boi_token_id": 255999,
|
| 48 |
+
"bos_token_id": 2,
|
| 49 |
+
"dtype": "bfloat16",
|
| 50 |
+
"eoa_token_id": 258883,
|
| 51 |
+
"eoa_token_index": 258883,
|
| 52 |
+
"eoi_token_id": 258882,
|
| 53 |
+
"eos_token_id": 106,
|
| 54 |
+
"hidden_size": 2560,
|
| 55 |
+
"image_token_id": 258880,
|
| 56 |
+
"initializer_range": 0.02,
|
| 57 |
+
"model_type": "gemma4",
|
| 58 |
+
"pad_token_id": 0,
|
| 59 |
+
"text_config": {
|
| 60 |
+
"attention_bias": false,
|
| 61 |
+
"attention_dropout": 0.0,
|
| 62 |
+
"attention_k_eq_v": false,
|
| 63 |
+
"bos_token_id": 2,
|
| 64 |
+
"dtype": "bfloat16",
|
| 65 |
+
"enable_moe_block": false,
|
| 66 |
+
"eos_token_id": 1,
|
| 67 |
+
"expert_intermediate_size": null,
|
| 68 |
+
"final_logit_softcapping": 30.0,
|
| 69 |
+
"global_head_dim": 512,
|
| 70 |
+
"head_dim": 256,
|
| 71 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 72 |
+
"hidden_size": 2560,
|
| 73 |
+
"hidden_size_per_layer_input": 256,
|
| 74 |
+
"initializer_range": 0.02,
|
| 75 |
+
"intermediate_size": 10240,
|
| 76 |
+
"layer_types": [
|
| 77 |
+
"sliding_attention",
|
| 78 |
+
"sliding_attention",
|
| 79 |
+
"sliding_attention",
|
| 80 |
+
"sliding_attention",
|
| 81 |
+
"sliding_attention",
|
| 82 |
+
"full_attention",
|
| 83 |
+
"sliding_attention",
|
| 84 |
+
"sliding_attention",
|
| 85 |
+
"sliding_attention",
|
| 86 |
+
"sliding_attention",
|
| 87 |
+
"sliding_attention",
|
| 88 |
+
"full_attention",
|
| 89 |
+
"sliding_attention",
|
| 90 |
+
"sliding_attention",
|
| 91 |
+
"sliding_attention",
|
| 92 |
+
"sliding_attention",
|
| 93 |
+
"sliding_attention",
|
| 94 |
+
"full_attention",
|
| 95 |
+
"sliding_attention",
|
| 96 |
+
"sliding_attention",
|
| 97 |
+
"sliding_attention",
|
| 98 |
+
"sliding_attention",
|
| 99 |
+
"sliding_attention",
|
| 100 |
+
"full_attention",
|
| 101 |
+
"sliding_attention",
|
| 102 |
+
"sliding_attention",
|
| 103 |
+
"sliding_attention",
|
| 104 |
+
"sliding_attention",
|
| 105 |
+
"sliding_attention",
|
| 106 |
+
"full_attention",
|
| 107 |
+
"sliding_attention",
|
| 108 |
+
"sliding_attention",
|
| 109 |
+
"sliding_attention",
|
| 110 |
+
"sliding_attention",
|
| 111 |
+
"sliding_attention",
|
| 112 |
+
"full_attention",
|
| 113 |
+
"sliding_attention",
|
| 114 |
+
"sliding_attention",
|
| 115 |
+
"sliding_attention",
|
| 116 |
+
"sliding_attention",
|
| 117 |
+
"sliding_attention",
|
| 118 |
+
"full_attention"
|
| 119 |
+
],
|
| 120 |
+
"max_position_embeddings": 131072,
|
| 121 |
+
"model_type": "gemma4_text",
|
| 122 |
+
"moe_intermediate_size": null,
|
| 123 |
+
"num_attention_heads": 8,
|
| 124 |
+
"num_experts": null,
|
| 125 |
+
"num_global_key_value_heads": null,
|
| 126 |
+
"num_hidden_layers": 42,
|
| 127 |
+
"num_key_value_heads": 2,
|
| 128 |
+
"num_kv_shared_layers": 18,
|
| 129 |
+
"pad_token_id": 0,
|
| 130 |
+
"rms_norm_eps": 1e-06,
|
| 131 |
+
"rope_parameters": {
|
| 132 |
+
"full_attention": {
|
| 133 |
+
"partial_rotary_factor": 0.25,
|
| 134 |
+
"rope_theta": 1000000.0,
|
| 135 |
+
"rope_type": "proportional"
|
| 136 |
+
},
|
| 137 |
+
"sliding_attention": {
|
| 138 |
+
"rope_theta": 10000.0,
|
| 139 |
+
"rope_type": "default"
|
| 140 |
+
}
|
| 141 |
+
},
|
| 142 |
+
"sliding_window": 512,
|
| 143 |
+
"tie_word_embeddings": true,
|
| 144 |
+
"top_k_experts": null,
|
| 145 |
+
"use_bidirectional_attention": null,
|
| 146 |
+
"use_cache": false,
|
| 147 |
+
"use_double_wide_mlp": false,
|
| 148 |
+
"vocab_size": 262144,
|
| 149 |
+
"vocab_size_per_layer_input": 262144
|
| 150 |
+
},
|
| 151 |
+
"tie_word_embeddings": true,
|
| 152 |
+
"transformers_version": "5.5.3",
|
| 153 |
+
"use_cache": false,
|
| 154 |
+
"video_token_id": 258884,
|
| 155 |
+
"vision_config": {
|
| 156 |
+
"_name_or_path": "",
|
| 157 |
+
"architectures": null,
|
| 158 |
+
"attention_bias": false,
|
| 159 |
+
"attention_dropout": 0.0,
|
| 160 |
+
"chunk_size_feed_forward": 0,
|
| 161 |
+
"default_output_length": 280,
|
| 162 |
+
"dtype": "bfloat16",
|
| 163 |
+
"global_head_dim": 64,
|
| 164 |
+
"head_dim": 64,
|
| 165 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 166 |
+
"hidden_size": 768,
|
| 167 |
+
"id2label": {
|
| 168 |
+
"0": "LABEL_0",
|
| 169 |
+
"1": "LABEL_1"
|
| 170 |
+
},
|
| 171 |
+
"initializer_range": 0.02,
|
| 172 |
+
"intermediate_size": 3072,
|
| 173 |
+
"is_encoder_decoder": false,
|
| 174 |
+
"label2id": {
|
| 175 |
+
"LABEL_0": 0,
|
| 176 |
+
"LABEL_1": 1
|
| 177 |
+
},
|
| 178 |
+
"max_position_embeddings": 131072,
|
| 179 |
+
"model_type": "gemma4_vision",
|
| 180 |
+
"num_attention_heads": 12,
|
| 181 |
+
"num_hidden_layers": 16,
|
| 182 |
+
"num_key_value_heads": 12,
|
| 183 |
+
"output_attentions": false,
|
| 184 |
+
"output_hidden_states": false,
|
| 185 |
+
"patch_size": 16,
|
| 186 |
+
"pooling_kernel_size": 3,
|
| 187 |
+
"position_embedding_size": 10240,
|
| 188 |
+
"problem_type": null,
|
| 189 |
+
"return_dict": true,
|
| 190 |
+
"rms_norm_eps": 1e-06,
|
| 191 |
+
"rope_parameters": {
|
| 192 |
+
"rope_theta": 100.0,
|
| 193 |
+
"rope_type": "default"
|
| 194 |
+
},
|
| 195 |
+
"standardize": false,
|
| 196 |
+
"use_clipped_linears": true
|
| 197 |
+
},
|
| 198 |
+
"vision_soft_tokens_per_image": 280
|
| 199 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/eval_results_job_gemma_gemma_4_e4b_20260430_011024.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mae_dx": 0.48666724137931033,
|
| 3 |
+
"rmse_dx": 1.0707492462177417,
|
| 4 |
+
"mae_dy": 0.3855034482758621,
|
| 5 |
+
"rmse_dy": 0.7843001492655289,
|
| 6 |
+
"mae_dz": 0.04997413793103449,
|
| 7 |
+
"rmse_dz": 0.156602120477122,
|
| 8 |
+
"mae_dpitch": 0.9934068965517242,
|
| 9 |
+
"rmse_dpitch": 1.7330746049166195,
|
| 10 |
+
"mae_dyaw": 2.2219862068965517,
|
| 11 |
+
"rmse_dyaw": 3.906024586323736,
|
| 12 |
+
"mae_droll": 0.0,
|
| 13 |
+
"rmse_droll": 0.0,
|
| 14 |
+
"mae_overall": 0.6895896551724138,
|
| 15 |
+
"mae_position": 0.30738160919540225,
|
| 16 |
+
"mae_rotation": 1.0717977011494253,
|
| 17 |
+
"rmse_overall": 1.8278735619896387,
|
| 18 |
+
"wp1_euc_mae": 0.2665493636964831,
|
| 19 |
+
"wp1_euc_median": 0.18,
|
| 20 |
+
"wp2_euc_mae": 0.5012943438070621,
|
| 21 |
+
"wp2_euc_median": 0.31144823004794875,
|
| 22 |
+
"wp3_euc_mae": 0.7271333853911885,
|
| 23 |
+
"wp3_euc_median": 0.48,
|
| 24 |
+
"wp4_euc_mae": 0.958032444080531,
|
| 25 |
+
"wp4_euc_median": 0.6351377754492935,
|
| 26 |
+
"wp5_euc_mae": 1.1876023185914943,
|
| 27 |
+
"wp5_euc_median": 0.7778817364281356,
|
| 28 |
+
"euclidean_mae": 0.7281223711133517,
|
| 29 |
+
"ADE": 0.7281223711133519,
|
| 30 |
+
"FDE": 1.1876023185914943,
|
| 31 |
+
"ADE_median": 0.49122803576716423,
|
| 32 |
+
"FDE_median": 0.7778817364281356,
|
| 33 |
+
"SR@0.5m": 0.5736206896551724,
|
| 34 |
+
"SR@1.0m": 0.783448275862069,
|
| 35 |
+
"SR@2.0m": 0.9222413793103448,
|
| 36 |
+
"SR@5.0m": 0.9898275862068966,
|
| 37 |
+
"TrajSR@1.0m": 0.5887931034482758,
|
| 38 |
+
"TrajSR@2.0m": 0.8353448275862069,
|
| 39 |
+
"TrajSR@5.0m": 0.9724137931034482,
|
| 40 |
+
"RotAcc@1.0deg": 0.39948275862068966,
|
| 41 |
+
"RotAcc@5.0deg": 0.83,
|
| 42 |
+
"RotAcc@10.0deg": 0.9762068965517241,
|
| 43 |
+
"wp1_rot_mae": 1.8561397413473146,
|
| 44 |
+
"wp2_rot_mae": 2.249132034716281,
|
| 45 |
+
"wp3_rot_mae": 2.6355453352548355,
|
| 46 |
+
"wp4_rot_mae": 3.048629056478642,
|
| 47 |
+
"wp5_rot_mae": 3.45811827126434,
|
| 48 |
+
"rotation_euc_mae": 2.6495128878122824,
|
| 49 |
+
"parse_failure_rate": 0.0,
|
| 50 |
+
"parse_success_rate": 1.0,
|
| 51 |
+
"valid_samples": 1160,
|
| 52 |
+
"total_samples": 1160,
|
| 53 |
+
"parse_failures": 0,
|
| 54 |
+
"inference_engine": "vllm",
|
| 55 |
+
"vllm_version": "0.19.0"
|
| 56 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/generation_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 2,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
106,
|
| 6 |
+
1,
|
| 7 |
+
106,
|
| 8 |
+
50
|
| 9 |
+
],
|
| 10 |
+
"pad_token_id": 0,
|
| 11 |
+
"temperature": 1.0,
|
| 12 |
+
"top_k": 64,
|
| 13 |
+
"top_p": 0.95,
|
| 14 |
+
"transformers_version": "5.5.3"
|
| 15 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa115532595f57272ed0b16337a23de6762ffa60ab858147f5f51f1cff34105b
|
| 3 |
+
size 15992595884
|
checkpoints/Gemma-4-E4B-it-SFT/processor_config.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"audio_ms_per_token": 40,
|
| 3 |
+
"audio_seq_length": 750,
|
| 4 |
+
"feature_extractor": {
|
| 5 |
+
"dither": 0.0,
|
| 6 |
+
"feature_extractor_type": "Gemma4AudioFeatureExtractor",
|
| 7 |
+
"feature_size": 128,
|
| 8 |
+
"fft_length": 512,
|
| 9 |
+
"fft_overdrive": false,
|
| 10 |
+
"frame_length": 320,
|
| 11 |
+
"hop_length": 160,
|
| 12 |
+
"input_scale_factor": 1.0,
|
| 13 |
+
"max_frequency": 8000.0,
|
| 14 |
+
"mel_floor": 0.001,
|
| 15 |
+
"min_frequency": 0.0,
|
| 16 |
+
"padding_side": "right",
|
| 17 |
+
"padding_value": 0.0,
|
| 18 |
+
"per_bin_mean": null,
|
| 19 |
+
"per_bin_stddev": null,
|
| 20 |
+
"preemphasis": 0.0,
|
| 21 |
+
"preemphasis_htk_flavor": true,
|
| 22 |
+
"return_attention_mask": true,
|
| 23 |
+
"sampling_rate": 16000
|
| 24 |
+
},
|
| 25 |
+
"image_processor": {
|
| 26 |
+
"do_convert_rgb": true,
|
| 27 |
+
"do_normalize": false,
|
| 28 |
+
"do_rescale": true,
|
| 29 |
+
"do_resize": true,
|
| 30 |
+
"image_mean": [
|
| 31 |
+
0.0,
|
| 32 |
+
0.0,
|
| 33 |
+
0.0
|
| 34 |
+
],
|
| 35 |
+
"image_processor_type": "Gemma4ImageProcessor",
|
| 36 |
+
"image_seq_length": 280,
|
| 37 |
+
"image_std": [
|
| 38 |
+
1.0,
|
| 39 |
+
1.0,
|
| 40 |
+
1.0
|
| 41 |
+
],
|
| 42 |
+
"max_soft_tokens": 280,
|
| 43 |
+
"patch_size": 16,
|
| 44 |
+
"pooling_kernel_size": 3,
|
| 45 |
+
"resample": 3,
|
| 46 |
+
"rescale_factor": 0.00392156862745098
|
| 47 |
+
},
|
| 48 |
+
"image_seq_length": 280,
|
| 49 |
+
"processor_class": "Gemma4Processor",
|
| 50 |
+
"video_processor": {
|
| 51 |
+
"do_convert_rgb": true,
|
| 52 |
+
"do_normalize": true,
|
| 53 |
+
"do_rescale": true,
|
| 54 |
+
"do_resize": true,
|
| 55 |
+
"do_sample_frames": true,
|
| 56 |
+
"image_mean": [
|
| 57 |
+
0.0,
|
| 58 |
+
0.0,
|
| 59 |
+
0.0
|
| 60 |
+
],
|
| 61 |
+
"image_std": [
|
| 62 |
+
1.0,
|
| 63 |
+
1.0,
|
| 64 |
+
1.0
|
| 65 |
+
],
|
| 66 |
+
"max_soft_tokens": 70,
|
| 67 |
+
"num_frames": 32,
|
| 68 |
+
"patch_size": 16,
|
| 69 |
+
"pooling_kernel_size": 3,
|
| 70 |
+
"resample": 3,
|
| 71 |
+
"rescale_factor": 0.00392156862745098,
|
| 72 |
+
"return_metadata": false,
|
| 73 |
+
"video_processor_type": "Gemma4VideoProcessor"
|
| 74 |
+
}
|
| 75 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
|
| 3 |
+
size 32169626
|
checkpoints/Gemma-4-E4B-it-SFT/tokenizer_config.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"audio_token": "<|audio|>",
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"boa_token": "<|audio>",
|
| 5 |
+
"boi_token": "<|image>",
|
| 6 |
+
"bos_token": "<bos>",
|
| 7 |
+
"eoa_token": "<audio|>",
|
| 8 |
+
"eoc_token": "<channel|>",
|
| 9 |
+
"eoi_token": "<image|>",
|
| 10 |
+
"eos_token": "<turn|>",
|
| 11 |
+
"eot_token": "<turn|>",
|
| 12 |
+
"escape_token": "<|\"|>",
|
| 13 |
+
"etc_token": "<tool_call|>",
|
| 14 |
+
"etd_token": "<tool|>",
|
| 15 |
+
"etr_token": "<tool_response|>",
|
| 16 |
+
"extra_special_tokens": [
|
| 17 |
+
"<|video|>"
|
| 18 |
+
],
|
| 19 |
+
"image_token": "<|image|>",
|
| 20 |
+
"is_local": true,
|
| 21 |
+
"mask_token": "<mask>",
|
| 22 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 23 |
+
"model_specific_special_tokens": {
|
| 24 |
+
"audio_token": "<|audio|>",
|
| 25 |
+
"boa_token": "<|audio>",
|
| 26 |
+
"boi_token": "<|image>",
|
| 27 |
+
"eoa_token": "<audio|>",
|
| 28 |
+
"eoc_token": "<channel|>",
|
| 29 |
+
"eoi_token": "<image|>",
|
| 30 |
+
"eot_token": "<turn|>",
|
| 31 |
+
"escape_token": "<|\"|>",
|
| 32 |
+
"etc_token": "<tool_call|>",
|
| 33 |
+
"etd_token": "<tool|>",
|
| 34 |
+
"etr_token": "<tool_response|>",
|
| 35 |
+
"image_token": "<|image|>",
|
| 36 |
+
"soc_token": "<|channel>",
|
| 37 |
+
"sot_token": "<|turn>",
|
| 38 |
+
"stc_token": "<|tool_call>",
|
| 39 |
+
"std_token": "<|tool>",
|
| 40 |
+
"str_token": "<|tool_response>",
|
| 41 |
+
"think_token": "<|think|>"
|
| 42 |
+
},
|
| 43 |
+
"pad_token": "<pad>",
|
| 44 |
+
"padding_side": "right",
|
| 45 |
+
"processor_class": "Gemma4Processor",
|
| 46 |
+
"response_schema": {
|
| 47 |
+
"properties": {
|
| 48 |
+
"content": {
|
| 49 |
+
"type": "string"
|
| 50 |
+
},
|
| 51 |
+
"role": {
|
| 52 |
+
"const": "assistant"
|
| 53 |
+
},
|
| 54 |
+
"thinking": {
|
| 55 |
+
"type": "string"
|
| 56 |
+
},
|
| 57 |
+
"tool_calls": {
|
| 58 |
+
"items": {
|
| 59 |
+
"properties": {
|
| 60 |
+
"function": {
|
| 61 |
+
"properties": {
|
| 62 |
+
"arguments": {
|
| 63 |
+
"additionalProperties": {},
|
| 64 |
+
"type": "object",
|
| 65 |
+
"x-parser": "gemma4-tool-call"
|
| 66 |
+
},
|
| 67 |
+
"name": {
|
| 68 |
+
"type": "string"
|
| 69 |
+
}
|
| 70 |
+
},
|
| 71 |
+
"type": "object",
|
| 72 |
+
"x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
|
| 73 |
+
},
|
| 74 |
+
"type": {
|
| 75 |
+
"const": "function"
|
| 76 |
+
}
|
| 77 |
+
},
|
| 78 |
+
"type": "object"
|
| 79 |
+
},
|
| 80 |
+
"type": "array",
|
| 81 |
+
"x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"type": "object",
|
| 85 |
+
"x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
|
| 86 |
+
},
|
| 87 |
+
"soc_token": "<|channel>",
|
| 88 |
+
"sot_token": "<|turn>",
|
| 89 |
+
"split_special_tokens": false,
|
| 90 |
+
"stc_token": "<|tool_call>",
|
| 91 |
+
"std_token": "<|tool>",
|
| 92 |
+
"str_token": "<|tool_response>",
|
| 93 |
+
"think_token": "<|think|>",
|
| 94 |
+
"tokenizer_class": "GemmaTokenizer",
|
| 95 |
+
"unk_token": "<unk>"
|
| 96 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.0913057758773248e+16,
|
| 4 |
+
"train_loss": 0.7292402684783935,
|
| 5 |
+
"train_runtime": 30167.0559,
|
| 6 |
+
"train_samples_per_second": 6.63,
|
| 7 |
+
"train_steps_per_second": 0.104
|
| 8 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/trainer_state.json
ADDED
|
@@ -0,0 +1,2227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3125,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0032,
|
| 14 |
+
"grad_norm": 366.0841096744857,
|
| 15 |
+
"learning_rate": 1.437699680511182e-07,
|
| 16 |
+
"loss": 23.85431823730469,
|
| 17 |
+
"step": 10
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.0064,
|
| 21 |
+
"grad_norm": 367.47333882445946,
|
| 22 |
+
"learning_rate": 3.0351437699680514e-07,
|
| 23 |
+
"loss": 23.65589599609375,
|
| 24 |
+
"step": 20
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.0096,
|
| 28 |
+
"grad_norm": 367.96579270464326,
|
| 29 |
+
"learning_rate": 4.6325878594249205e-07,
|
| 30 |
+
"loss": 22.780029296875,
|
| 31 |
+
"step": 30
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.0128,
|
| 35 |
+
"grad_norm": 332.5732884154056,
|
| 36 |
+
"learning_rate": 6.230031948881789e-07,
|
| 37 |
+
"loss": 20.279689025878906,
|
| 38 |
+
"step": 40
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.016,
|
| 42 |
+
"grad_norm": 219.53674756423746,
|
| 43 |
+
"learning_rate": 7.82747603833866e-07,
|
| 44 |
+
"loss": 15.498806762695313,
|
| 45 |
+
"step": 50
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0192,
|
| 49 |
+
"grad_norm": 156.1487544830451,
|
| 50 |
+
"learning_rate": 9.424920127795528e-07,
|
| 51 |
+
"loss": 10.388201904296874,
|
| 52 |
+
"step": 60
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.0224,
|
| 56 |
+
"grad_norm": 37.96869040917498,
|
| 57 |
+
"learning_rate": 1.1022364217252397e-06,
|
| 58 |
+
"loss": 3.7560958862304688,
|
| 59 |
+
"step": 70
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.0256,
|
| 63 |
+
"grad_norm": 16.783464772614202,
|
| 64 |
+
"learning_rate": 1.2619808306709266e-06,
|
| 65 |
+
"loss": 2.033830261230469,
|
| 66 |
+
"step": 80
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.0288,
|
| 70 |
+
"grad_norm": 5.438256169634593,
|
| 71 |
+
"learning_rate": 1.4217252396166134e-06,
|
| 72 |
+
"loss": 1.0431390762329102,
|
| 73 |
+
"step": 90
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.032,
|
| 77 |
+
"grad_norm": 3.6935246150045775,
|
| 78 |
+
"learning_rate": 1.5814696485623005e-06,
|
| 79 |
+
"loss": 0.8069572448730469,
|
| 80 |
+
"step": 100
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.0352,
|
| 84 |
+
"grad_norm": 9.218312544625562,
|
| 85 |
+
"learning_rate": 1.7412140575079875e-06,
|
| 86 |
+
"loss": 0.7057615280151367,
|
| 87 |
+
"step": 110
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.0384,
|
| 91 |
+
"grad_norm": 5.394484238866305,
|
| 92 |
+
"learning_rate": 1.9009584664536742e-06,
|
| 93 |
+
"loss": 0.6301750183105469,
|
| 94 |
+
"step": 120
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.0416,
|
| 98 |
+
"grad_norm": 6.577481237217732,
|
| 99 |
+
"learning_rate": 2.060702875399361e-06,
|
| 100 |
+
"loss": 0.5898516654968262,
|
| 101 |
+
"step": 130
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.0448,
|
| 105 |
+
"grad_norm": 3.4158074483641068,
|
| 106 |
+
"learning_rate": 2.220447284345048e-06,
|
| 107 |
+
"loss": 0.5524418830871582,
|
| 108 |
+
"step": 140
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.048,
|
| 112 |
+
"grad_norm": 4.032046521040006,
|
| 113 |
+
"learning_rate": 2.380191693290735e-06,
|
| 114 |
+
"loss": 0.5317594051361084,
|
| 115 |
+
"step": 150
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.0512,
|
| 119 |
+
"grad_norm": 5.468634675306576,
|
| 120 |
+
"learning_rate": 2.539936102236422e-06,
|
| 121 |
+
"loss": 0.5184277534484864,
|
| 122 |
+
"step": 160
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.0544,
|
| 126 |
+
"grad_norm": 3.4313124951156424,
|
| 127 |
+
"learning_rate": 2.699680511182109e-06,
|
| 128 |
+
"loss": 0.5204483985900878,
|
| 129 |
+
"step": 170
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.0576,
|
| 133 |
+
"grad_norm": 5.13400179254009,
|
| 134 |
+
"learning_rate": 2.8594249201277955e-06,
|
| 135 |
+
"loss": 0.5058025360107422,
|
| 136 |
+
"step": 180
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0608,
|
| 140 |
+
"grad_norm": 5.9183424216837786,
|
| 141 |
+
"learning_rate": 3.0191693290734825e-06,
|
| 142 |
+
"loss": 0.5073411941528321,
|
| 143 |
+
"step": 190
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.064,
|
| 147 |
+
"grad_norm": 5.625073986187664,
|
| 148 |
+
"learning_rate": 3.17891373801917e-06,
|
| 149 |
+
"loss": 0.5000103950500489,
|
| 150 |
+
"step": 200
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.0672,
|
| 154 |
+
"grad_norm": 5.050603467051007,
|
| 155 |
+
"learning_rate": 3.3386581469648564e-06,
|
| 156 |
+
"loss": 0.488192081451416,
|
| 157 |
+
"step": 210
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.0704,
|
| 161 |
+
"grad_norm": 11.776866822937645,
|
| 162 |
+
"learning_rate": 3.4984025559105434e-06,
|
| 163 |
+
"loss": 0.48699202537536623,
|
| 164 |
+
"step": 220
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.0736,
|
| 168 |
+
"grad_norm": 7.438900018795585,
|
| 169 |
+
"learning_rate": 3.6581469648562303e-06,
|
| 170 |
+
"loss": 0.4820102691650391,
|
| 171 |
+
"step": 230
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.0768,
|
| 175 |
+
"grad_norm": 4.3491840646532065,
|
| 176 |
+
"learning_rate": 3.817891373801918e-06,
|
| 177 |
+
"loss": 0.47640199661254884,
|
| 178 |
+
"step": 240
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.08,
|
| 182 |
+
"grad_norm": 3.472565426233091,
|
| 183 |
+
"learning_rate": 3.977635782747604e-06,
|
| 184 |
+
"loss": 0.4729574203491211,
|
| 185 |
+
"step": 250
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.0832,
|
| 189 |
+
"grad_norm": 3.1912744148161942,
|
| 190 |
+
"learning_rate": 4.137380191693291e-06,
|
| 191 |
+
"loss": 0.4786433219909668,
|
| 192 |
+
"step": 260
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.0864,
|
| 196 |
+
"grad_norm": 3.9698013424470777,
|
| 197 |
+
"learning_rate": 4.297124600638978e-06,
|
| 198 |
+
"loss": 0.4748369216918945,
|
| 199 |
+
"step": 270
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.0896,
|
| 203 |
+
"grad_norm": 8.11949393489321,
|
| 204 |
+
"learning_rate": 4.456869009584665e-06,
|
| 205 |
+
"loss": 0.4681865692138672,
|
| 206 |
+
"step": 280
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.0928,
|
| 210 |
+
"grad_norm": 4.7349566199381234,
|
| 211 |
+
"learning_rate": 4.616613418530352e-06,
|
| 212 |
+
"loss": 0.46743001937866213,
|
| 213 |
+
"step": 290
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.096,
|
| 217 |
+
"grad_norm": 4.756427284033883,
|
| 218 |
+
"learning_rate": 4.776357827476039e-06,
|
| 219 |
+
"loss": 0.46964178085327146,
|
| 220 |
+
"step": 300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.0992,
|
| 224 |
+
"grad_norm": 4.86570605379029,
|
| 225 |
+
"learning_rate": 4.936102236421725e-06,
|
| 226 |
+
"loss": 0.45612516403198244,
|
| 227 |
+
"step": 310
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.1024,
|
| 231 |
+
"grad_norm": 5.762654788054032,
|
| 232 |
+
"learning_rate": 4.999943833158769e-06,
|
| 233 |
+
"loss": 0.45009474754333495,
|
| 234 |
+
"step": 320
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.1056,
|
| 238 |
+
"grad_norm": 3.501477346053355,
|
| 239 |
+
"learning_rate": 4.999600600490783e-06,
|
| 240 |
+
"loss": 0.4523477554321289,
|
| 241 |
+
"step": 330
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.1088,
|
| 245 |
+
"grad_norm": 7.957279740713588,
|
| 246 |
+
"learning_rate": 4.9989453817439345e-06,
|
| 247 |
+
"loss": 0.44190473556518556,
|
| 248 |
+
"step": 340
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.112,
|
| 252 |
+
"grad_norm": 7.660308885793361,
|
| 253 |
+
"learning_rate": 4.997978258698942e-06,
|
| 254 |
+
"loss": 0.43758931159973147,
|
| 255 |
+
"step": 350
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.1152,
|
| 259 |
+
"grad_norm": 5.8839479464224205,
|
| 260 |
+
"learning_rate": 4.996699352066659e-06,
|
| 261 |
+
"loss": 0.4371060371398926,
|
| 262 |
+
"step": 360
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.1184,
|
| 266 |
+
"grad_norm": 3.452842882877267,
|
| 267 |
+
"learning_rate": 4.995108821473014e-06,
|
| 268 |
+
"loss": 0.42999753952026365,
|
| 269 |
+
"step": 370
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.1216,
|
| 273 |
+
"grad_norm": 4.825810317520427,
|
| 274 |
+
"learning_rate": 4.993206865439084e-06,
|
| 275 |
+
"loss": 0.4285894393920898,
|
| 276 |
+
"step": 380
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.1248,
|
| 280 |
+
"grad_norm": 5.379766821254966,
|
| 281 |
+
"learning_rate": 4.990993721356317e-06,
|
| 282 |
+
"loss": 0.42139811515808107,
|
| 283 |
+
"step": 390
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.128,
|
| 287 |
+
"grad_norm": 4.854730410799869,
|
| 288 |
+
"learning_rate": 4.988469665456901e-06,
|
| 289 |
+
"loss": 0.42040281295776366,
|
| 290 |
+
"step": 400
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.1312,
|
| 294 |
+
"grad_norm": 4.6616615938661745,
|
| 295 |
+
"learning_rate": 4.985635012779288e-06,
|
| 296 |
+
"loss": 0.4207456588745117,
|
| 297 |
+
"step": 410
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.1344,
|
| 301 |
+
"grad_norm": 4.5341296475975605,
|
| 302 |
+
"learning_rate": 4.98249011712887e-06,
|
| 303 |
+
"loss": 0.414472770690918,
|
| 304 |
+
"step": 420
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.1376,
|
| 308 |
+
"grad_norm": 5.217437981869656,
|
| 309 |
+
"learning_rate": 4.979035371033824e-06,
|
| 310 |
+
"loss": 0.41441006660461427,
|
| 311 |
+
"step": 430
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.1408,
|
| 315 |
+
"grad_norm": 3.561516924716779,
|
| 316 |
+
"learning_rate": 4.975271205696115e-06,
|
| 317 |
+
"loss": 0.40767755508422854,
|
| 318 |
+
"step": 440
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.144,
|
| 322 |
+
"grad_norm": 3.815692337476438,
|
| 323 |
+
"learning_rate": 4.971198090937671e-06,
|
| 324 |
+
"loss": 0.3997596263885498,
|
| 325 |
+
"step": 450
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.1472,
|
| 329 |
+
"grad_norm": 4.559242371997167,
|
| 330 |
+
"learning_rate": 4.966816535141756e-06,
|
| 331 |
+
"loss": 0.39360842704772947,
|
| 332 |
+
"step": 460
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.1504,
|
| 336 |
+
"grad_norm": 3.432229350472061,
|
| 337 |
+
"learning_rate": 4.9621270851895035e-06,
|
| 338 |
+
"loss": 0.40289998054504395,
|
| 339 |
+
"step": 470
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.1536,
|
| 343 |
+
"grad_norm": 5.375227134041046,
|
| 344 |
+
"learning_rate": 4.957130326391662e-06,
|
| 345 |
+
"loss": 0.3982266664505005,
|
| 346 |
+
"step": 480
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.1568,
|
| 350 |
+
"grad_norm": 5.539585521677851,
|
| 351 |
+
"learning_rate": 4.951826882415544e-06,
|
| 352 |
+
"loss": 0.39270691871643065,
|
| 353 |
+
"step": 490
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.16,
|
| 357 |
+
"grad_norm": 3.4147092253345743,
|
| 358 |
+
"learning_rate": 4.946217415207177e-06,
|
| 359 |
+
"loss": 0.3853750705718994,
|
| 360 |
+
"step": 500
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.1632,
|
| 364 |
+
"grad_norm": 4.444175842440995,
|
| 365 |
+
"learning_rate": 4.940302624908689e-06,
|
| 366 |
+
"loss": 0.38694162368774415,
|
| 367 |
+
"step": 510
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.1664,
|
| 371 |
+
"grad_norm": 3.3493207902303475,
|
| 372 |
+
"learning_rate": 4.934083249770912e-06,
|
| 373 |
+
"loss": 0.3797153949737549,
|
| 374 |
+
"step": 520
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.1696,
|
| 378 |
+
"grad_norm": 3.0499194254019097,
|
| 379 |
+
"learning_rate": 4.927560066061251e-06,
|
| 380 |
+
"loss": 0.38063654899597166,
|
| 381 |
+
"step": 530
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.1728,
|
| 385 |
+
"grad_norm": 3.141871281336489,
|
| 386 |
+
"learning_rate": 4.920733887966783e-06,
|
| 387 |
+
"loss": 0.39005699157714846,
|
| 388 |
+
"step": 540
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.176,
|
| 392 |
+
"grad_norm": 3.979297184951908,
|
| 393 |
+
"learning_rate": 4.913605567492636e-06,
|
| 394 |
+
"loss": 0.38013472557067873,
|
| 395 |
+
"step": 550
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.1792,
|
| 399 |
+
"grad_norm": 3.7669251986704113,
|
| 400 |
+
"learning_rate": 4.906175994355656e-06,
|
| 401 |
+
"loss": 0.37832577228546144,
|
| 402 |
+
"step": 560
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.1824,
|
| 406 |
+
"grad_norm": 2.983798431857085,
|
| 407 |
+
"learning_rate": 4.898446095873345e-06,
|
| 408 |
+
"loss": 0.38150479793548586,
|
| 409 |
+
"step": 570
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.1856,
|
| 413 |
+
"grad_norm": 3.657787030439589,
|
| 414 |
+
"learning_rate": 4.890416836848128e-06,
|
| 415 |
+
"loss": 0.3775670528411865,
|
| 416 |
+
"step": 580
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.1888,
|
| 420 |
+
"grad_norm": 3.551048022748126,
|
| 421 |
+
"learning_rate": 4.882089219446925e-06,
|
| 422 |
+
"loss": 0.37199065685272215,
|
| 423 |
+
"step": 590
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.192,
|
| 427 |
+
"grad_norm": 4.750977601329729,
|
| 428 |
+
"learning_rate": 4.873464283076074e-06,
|
| 429 |
+
"loss": 0.3790221452713013,
|
| 430 |
+
"step": 600
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.1952,
|
| 434 |
+
"grad_norm": 7.684545118387627,
|
| 435 |
+
"learning_rate": 4.864543104251587e-06,
|
| 436 |
+
"loss": 0.37508673667907716,
|
| 437 |
+
"step": 610
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.1984,
|
| 441 |
+
"grad_norm": 5.872575231845199,
|
| 442 |
+
"learning_rate": 4.855326796464798e-06,
|
| 443 |
+
"loss": 0.3811868906021118,
|
| 444 |
+
"step": 620
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.2016,
|
| 448 |
+
"grad_norm": 3.9960144706794316,
|
| 449 |
+
"learning_rate": 4.8458165100433725e-06,
|
| 450 |
+
"loss": 0.37326750755310056,
|
| 451 |
+
"step": 630
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.2048,
|
| 455 |
+
"grad_norm": 3.9998452581157657,
|
| 456 |
+
"learning_rate": 4.836013432007738e-06,
|
| 457 |
+
"loss": 0.3709099769592285,
|
| 458 |
+
"step": 640
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.208,
|
| 462 |
+
"grad_norm": 2.6973135018594343,
|
| 463 |
+
"learning_rate": 4.825918785922921e-06,
|
| 464 |
+
"loss": 0.3728507995605469,
|
| 465 |
+
"step": 650
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.2112,
|
| 469 |
+
"grad_norm": 4.478756132604264,
|
| 470 |
+
"learning_rate": 4.8155338317458315e-06,
|
| 471 |
+
"loss": 0.36782591342926024,
|
| 472 |
+
"step": 660
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.2144,
|
| 476 |
+
"grad_norm": 2.5620662799375378,
|
| 477 |
+
"learning_rate": 4.804859865668002e-06,
|
| 478 |
+
"loss": 0.36416780948638916,
|
| 479 |
+
"step": 670
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.2176,
|
| 483 |
+
"grad_norm": 2.9398359151969884,
|
| 484 |
+
"learning_rate": 4.793898219953804e-06,
|
| 485 |
+
"loss": 0.36772732734680175,
|
| 486 |
+
"step": 680
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.2208,
|
| 490 |
+
"grad_norm": 3.404020172068192,
|
| 491 |
+
"learning_rate": 4.782650262774164e-06,
|
| 492 |
+
"loss": 0.3651688575744629,
|
| 493 |
+
"step": 690
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.224,
|
| 497 |
+
"grad_norm": 2.588678061474319,
|
| 498 |
+
"learning_rate": 4.7711173980357886e-06,
|
| 499 |
+
"loss": 0.3649880409240723,
|
| 500 |
+
"step": 700
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.2272,
|
| 504 |
+
"grad_norm": 3.5390276900279773,
|
| 505 |
+
"learning_rate": 4.759301065205947e-06,
|
| 506 |
+
"loss": 0.3612825870513916,
|
| 507 |
+
"step": 710
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.2304,
|
| 511 |
+
"grad_norm": 3.8670986814196473,
|
| 512 |
+
"learning_rate": 4.7472027391328e-06,
|
| 513 |
+
"loss": 0.3657612085342407,
|
| 514 |
+
"step": 720
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.2336,
|
| 518 |
+
"grad_norm": 3.0276354554801217,
|
| 519 |
+
"learning_rate": 4.734823929861317e-06,
|
| 520 |
+
"loss": 0.36682844161987305,
|
| 521 |
+
"step": 730
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.2368,
|
| 525 |
+
"grad_norm": 5.205227283770371,
|
| 526 |
+
"learning_rate": 4.722166182444801e-06,
|
| 527 |
+
"loss": 0.3605961322784424,
|
| 528 |
+
"step": 740
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.24,
|
| 532 |
+
"grad_norm": 3.1037248816470737,
|
| 533 |
+
"learning_rate": 4.709231076752045e-06,
|
| 534 |
+
"loss": 0.3625338554382324,
|
| 535 |
+
"step": 750
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.2432,
|
| 539 |
+
"grad_norm": 3.827009314178272,
|
| 540 |
+
"learning_rate": 4.696020227270142e-06,
|
| 541 |
+
"loss": 0.36273531913757323,
|
| 542 |
+
"step": 760
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.2464,
|
| 546 |
+
"grad_norm": 2.553717481812464,
|
| 547 |
+
"learning_rate": 4.6825352829029705e-06,
|
| 548 |
+
"loss": 0.35740270614624026,
|
| 549 |
+
"step": 770
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.2496,
|
| 553 |
+
"grad_norm": 2.8273485176739563,
|
| 554 |
+
"learning_rate": 4.668777926765392e-06,
|
| 555 |
+
"loss": 0.3613132953643799,
|
| 556 |
+
"step": 780
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.2528,
|
| 560 |
+
"grad_norm": 3.242165291552063,
|
| 561 |
+
"learning_rate": 4.6547498759731725e-06,
|
| 562 |
+
"loss": 0.3525214672088623,
|
| 563 |
+
"step": 790
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.256,
|
| 567 |
+
"grad_norm": 2.607635187753211,
|
| 568 |
+
"learning_rate": 4.6404528814286575e-06,
|
| 569 |
+
"loss": 0.3569283723831177,
|
| 570 |
+
"step": 800
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.2592,
|
| 574 |
+
"grad_norm": 3.2439792578606204,
|
| 575 |
+
"learning_rate": 4.6258887276022425e-06,
|
| 576 |
+
"loss": 0.357681941986084,
|
| 577 |
+
"step": 810
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.2624,
|
| 581 |
+
"grad_norm": 2.9728036180938284,
|
| 582 |
+
"learning_rate": 4.611059232309639e-06,
|
| 583 |
+
"loss": 0.3537192106246948,
|
| 584 |
+
"step": 820
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.2656,
|
| 588 |
+
"grad_norm": 2.556165398739607,
|
| 589 |
+
"learning_rate": 4.595966246484986e-06,
|
| 590 |
+
"loss": 0.3528641700744629,
|
| 591 |
+
"step": 830
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.2688,
|
| 595 |
+
"grad_norm": 2.593548528246384,
|
| 596 |
+
"learning_rate": 4.580611653949829e-06,
|
| 597 |
+
"loss": 0.3564203500747681,
|
| 598 |
+
"step": 840
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.272,
|
| 602 |
+
"grad_norm": 3.428440109671292,
|
| 603 |
+
"learning_rate": 4.564997371177992e-06,
|
| 604 |
+
"loss": 0.3518026828765869,
|
| 605 |
+
"step": 850
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.2752,
|
| 609 |
+
"grad_norm": 4.993564850548027,
|
| 610 |
+
"learning_rate": 4.54912534705637e-06,
|
| 611 |
+
"loss": 0.35079920291900635,
|
| 612 |
+
"step": 860
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.2784,
|
| 616 |
+
"grad_norm": 3.340510283095063,
|
| 617 |
+
"learning_rate": 4.532997562641683e-06,
|
| 618 |
+
"loss": 0.3466078042984009,
|
| 619 |
+
"step": 870
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.2816,
|
| 623 |
+
"grad_norm": 2.6894615056191644,
|
| 624 |
+
"learning_rate": 4.516616030913214e-06,
|
| 625 |
+
"loss": 0.3472653865814209,
|
| 626 |
+
"step": 880
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.2848,
|
| 630 |
+
"grad_norm": 1.891440124594712,
|
| 631 |
+
"learning_rate": 4.499982796521556e-06,
|
| 632 |
+
"loss": 0.34483723640441893,
|
| 633 |
+
"step": 890
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.288,
|
| 637 |
+
"grad_norm": 3.223309297530686,
|
| 638 |
+
"learning_rate": 4.48309993553341e-06,
|
| 639 |
+
"loss": 0.3444544553756714,
|
| 640 |
+
"step": 900
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.2912,
|
| 644 |
+
"grad_norm": 3.1032077209020468,
|
| 645 |
+
"learning_rate": 4.465969555172468e-06,
|
| 646 |
+
"loss": 0.34571564197540283,
|
| 647 |
+
"step": 910
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.2944,
|
| 651 |
+
"grad_norm": 2.5407458837926638,
|
| 652 |
+
"learning_rate": 4.448593793556391e-06,
|
| 653 |
+
"loss": 0.3534140110015869,
|
| 654 |
+
"step": 920
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.2976,
|
| 658 |
+
"grad_norm": 3.1253686498979123,
|
| 659 |
+
"learning_rate": 4.430974819429954e-06,
|
| 660 |
+
"loss": 0.3445676326751709,
|
| 661 |
+
"step": 930
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.3008,
|
| 665 |
+
"grad_norm": 3.740083740472538,
|
| 666 |
+
"learning_rate": 4.413114831894344e-06,
|
| 667 |
+
"loss": 0.33962287902832033,
|
| 668 |
+
"step": 940
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.304,
|
| 672 |
+
"grad_norm": 4.724023923665093,
|
| 673 |
+
"learning_rate": 4.3950160601326865e-06,
|
| 674 |
+
"loss": 0.3363780498504639,
|
| 675 |
+
"step": 950
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.3072,
|
| 679 |
+
"grad_norm": 3.597276867142834,
|
| 680 |
+
"learning_rate": 4.376680763131811e-06,
|
| 681 |
+
"loss": 0.3429840087890625,
|
| 682 |
+
"step": 960
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.3104,
|
| 686 |
+
"grad_norm": 2.97998267012516,
|
| 687 |
+
"learning_rate": 4.358111229400296e-06,
|
| 688 |
+
"loss": 0.3470882177352905,
|
| 689 |
+
"step": 970
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 0.3136,
|
| 693 |
+
"grad_norm": 3.1405275857331856,
|
| 694 |
+
"learning_rate": 4.33930977668283e-06,
|
| 695 |
+
"loss": 0.35235731601715087,
|
| 696 |
+
"step": 980
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 0.3168,
|
| 700 |
+
"grad_norm": 3.774584318253359,
|
| 701 |
+
"learning_rate": 4.320278751670922e-06,
|
| 702 |
+
"loss": 0.3418004512786865,
|
| 703 |
+
"step": 990
|
| 704 |
+
},
|
| 705 |
+
{
|
| 706 |
+
"epoch": 0.32,
|
| 707 |
+
"grad_norm": 3.4325438208492605,
|
| 708 |
+
"learning_rate": 4.301020529710009e-06,
|
| 709 |
+
"loss": 0.3456583499908447,
|
| 710 |
+
"step": 1000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.3232,
|
| 714 |
+
"grad_norm": 3.1407187711443916,
|
| 715 |
+
"learning_rate": 4.281537514502962e-06,
|
| 716 |
+
"loss": 0.3446167469024658,
|
| 717 |
+
"step": 1010
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.3264,
|
| 721 |
+
"grad_norm": 2.6154317834679226,
|
| 722 |
+
"learning_rate": 4.261832137810093e-06,
|
| 723 |
+
"loss": 0.34354138374328613,
|
| 724 |
+
"step": 1020
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.3296,
|
| 728 |
+
"grad_norm": 2.8993376261822648,
|
| 729 |
+
"learning_rate": 4.241906859145611e-06,
|
| 730 |
+
"loss": 0.3451784372329712,
|
| 731 |
+
"step": 1030
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.3328,
|
| 735 |
+
"grad_norm": 2.3351853591260574,
|
| 736 |
+
"learning_rate": 4.221764165470661e-06,
|
| 737 |
+
"loss": 0.33875834941864014,
|
| 738 |
+
"step": 1040
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.336,
|
| 742 |
+
"grad_norm": 3.4295735539049605,
|
| 743 |
+
"learning_rate": 4.201406570882898e-06,
|
| 744 |
+
"loss": 0.33980226516723633,
|
| 745 |
+
"step": 1050
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.3392,
|
| 749 |
+
"grad_norm": 2.6388634367096735,
|
| 750 |
+
"learning_rate": 4.180836616302704e-06,
|
| 751 |
+
"loss": 0.3395829200744629,
|
| 752 |
+
"step": 1060
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.3424,
|
| 756 |
+
"grad_norm": 3.211009486395674,
|
| 757 |
+
"learning_rate": 4.160056869156041e-06,
|
| 758 |
+
"loss": 0.3433471441268921,
|
| 759 |
+
"step": 1070
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.3456,
|
| 763 |
+
"grad_norm": 3.4377414857289317,
|
| 764 |
+
"learning_rate": 4.139069923053995e-06,
|
| 765 |
+
"loss": 0.34047765731811525,
|
| 766 |
+
"step": 1080
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.3488,
|
| 770 |
+
"grad_norm": 3.131466112366247,
|
| 771 |
+
"learning_rate": 4.117878397469062e-06,
|
| 772 |
+
"loss": 0.3420018434524536,
|
| 773 |
+
"step": 1090
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.352,
|
| 777 |
+
"grad_norm": 2.388207923072635,
|
| 778 |
+
"learning_rate": 4.096484937408195e-06,
|
| 779 |
+
"loss": 0.3351470470428467,
|
| 780 |
+
"step": 1100
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.3552,
|
| 784 |
+
"grad_norm": 2.2910707329028117,
|
| 785 |
+
"learning_rate": 4.074892213082676e-06,
|
| 786 |
+
"loss": 0.33539299964904784,
|
| 787 |
+
"step": 1110
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.3584,
|
| 791 |
+
"grad_norm": 2.156244058261874,
|
| 792 |
+
"learning_rate": 4.0531029195748265e-06,
|
| 793 |
+
"loss": 0.33862009048461916,
|
| 794 |
+
"step": 1120
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.3616,
|
| 798 |
+
"grad_norm": 2.6382644444406296,
|
| 799 |
+
"learning_rate": 4.03111977650163e-06,
|
| 800 |
+
"loss": 0.34041495323181153,
|
| 801 |
+
"step": 1130
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.3648,
|
| 805 |
+
"grad_norm": 2.5960896388831545,
|
| 806 |
+
"learning_rate": 4.008945527675281e-06,
|
| 807 |
+
"loss": 0.3390871524810791,
|
| 808 |
+
"step": 1140
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.368,
|
| 812 |
+
"grad_norm": 3.657074741484568,
|
| 813 |
+
"learning_rate": 3.986582940760717e-06,
|
| 814 |
+
"loss": 0.3278806209564209,
|
| 815 |
+
"step": 1150
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.3712,
|
| 819 |
+
"grad_norm": 2.9587401358526075,
|
| 820 |
+
"learning_rate": 3.9640348069301785e-06,
|
| 821 |
+
"loss": 0.3368961334228516,
|
| 822 |
+
"step": 1160
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.3744,
|
| 826 |
+
"grad_norm": 1.965300565427372,
|
| 827 |
+
"learning_rate": 3.941303940514826e-06,
|
| 828 |
+
"loss": 0.3339808464050293,
|
| 829 |
+
"step": 1170
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.3776,
|
| 833 |
+
"grad_norm": 2.90985435283837,
|
| 834 |
+
"learning_rate": 3.918393178653472e-06,
|
| 835 |
+
"loss": 0.3376065969467163,
|
| 836 |
+
"step": 1180
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.3808,
|
| 840 |
+
"grad_norm": 3.27190473511409,
|
| 841 |
+
"learning_rate": 3.895305380938468e-06,
|
| 842 |
+
"loss": 0.3342454433441162,
|
| 843 |
+
"step": 1190
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 0.384,
|
| 847 |
+
"grad_norm": 2.0468253424433165,
|
| 848 |
+
"learning_rate": 3.872043429058783e-06,
|
| 849 |
+
"loss": 0.32965447902679446,
|
| 850 |
+
"step": 1200
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 0.3872,
|
| 854 |
+
"grad_norm": 2.5123150680001576,
|
| 855 |
+
"learning_rate": 3.84861022644033e-06,
|
| 856 |
+
"loss": 0.3357837677001953,
|
| 857 |
+
"step": 1210
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 0.3904,
|
| 861 |
+
"grad_norm": 3.148104290988529,
|
| 862 |
+
"learning_rate": 3.825008697883574e-06,
|
| 863 |
+
"loss": 0.34343953132629396,
|
| 864 |
+
"step": 1220
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 0.3936,
|
| 868 |
+
"grad_norm": 2.488823913942074,
|
| 869 |
+
"learning_rate": 3.8012417891984776e-06,
|
| 870 |
+
"loss": 0.333116340637207,
|
| 871 |
+
"step": 1230
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 0.3968,
|
| 875 |
+
"grad_norm": 3.0225259799028645,
|
| 876 |
+
"learning_rate": 3.777312466836819e-06,
|
| 877 |
+
"loss": 0.3318933486938477,
|
| 878 |
+
"step": 1240
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.4,
|
| 882 |
+
"grad_norm": 3.3439153363899115,
|
| 883 |
+
"learning_rate": 3.7532237175219378e-06,
|
| 884 |
+
"loss": 0.32833037376403806,
|
| 885 |
+
"step": 1250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.4032,
|
| 889 |
+
"grad_norm": 2.72884090647899,
|
| 890 |
+
"learning_rate": 3.728978547875948e-06,
|
| 891 |
+
"loss": 0.3360243082046509,
|
| 892 |
+
"step": 1260
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.4064,
|
| 896 |
+
"grad_norm": 2.5999080124511966,
|
| 897 |
+
"learning_rate": 3.7045799840444712e-06,
|
| 898 |
+
"loss": 0.33025145530700684,
|
| 899 |
+
"step": 1270
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.4096,
|
| 903 |
+
"grad_norm": 3.0518346526448488,
|
| 904 |
+
"learning_rate": 3.6800310713189258e-06,
|
| 905 |
+
"loss": 0.3306798219680786,
|
| 906 |
+
"step": 1280
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.4128,
|
| 910 |
+
"grad_norm": 2.0509087709244507,
|
| 911 |
+
"learning_rate": 3.6553348737564328e-06,
|
| 912 |
+
"loss": 0.33091559410095217,
|
| 913 |
+
"step": 1290
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.416,
|
| 917 |
+
"grad_norm": 2.908137390744499,
|
| 918 |
+
"learning_rate": 3.6304944737973794e-06,
|
| 919 |
+
"loss": 0.33455810546875,
|
| 920 |
+
"step": 1300
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.4192,
|
| 924 |
+
"grad_norm": 3.0396312942670796,
|
| 925 |
+
"learning_rate": 3.6055129718806836e-06,
|
| 926 |
+
"loss": 0.331624960899353,
|
| 927 |
+
"step": 1310
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.4224,
|
| 931 |
+
"grad_norm": 3.282462978283218,
|
| 932 |
+
"learning_rate": 3.5803934860568134e-06,
|
| 933 |
+
"loss": 0.32364490032196047,
|
| 934 |
+
"step": 1320
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.4256,
|
| 938 |
+
"grad_norm": 2.2269456751164727,
|
| 939 |
+
"learning_rate": 3.5551391515986163e-06,
|
| 940 |
+
"loss": 0.3319955348968506,
|
| 941 |
+
"step": 1330
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.4288,
|
| 945 |
+
"grad_norm": 2.8364899461485527,
|
| 946 |
+
"learning_rate": 3.529753120609982e-06,
|
| 947 |
+
"loss": 0.3252741813659668,
|
| 948 |
+
"step": 1340
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.432,
|
| 952 |
+
"grad_norm": 2.89515974439621,
|
| 953 |
+
"learning_rate": 3.5042385616324243e-06,
|
| 954 |
+
"loss": 0.3287111520767212,
|
| 955 |
+
"step": 1350
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.4352,
|
| 959 |
+
"grad_norm": 2.311001238312573,
|
| 960 |
+
"learning_rate": 3.4785986592495934e-06,
|
| 961 |
+
"loss": 0.32939796447753905,
|
| 962 |
+
"step": 1360
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.4384,
|
| 966 |
+
"grad_norm": 2.4126049139350734,
|
| 967 |
+
"learning_rate": 3.452836613689803e-06,
|
| 968 |
+
"loss": 0.32168779373168943,
|
| 969 |
+
"step": 1370
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 0.4416,
|
| 973 |
+
"grad_norm": 3.1765584413022254,
|
| 974 |
+
"learning_rate": 3.426955640426584e-06,
|
| 975 |
+
"loss": 0.32864985466003416,
|
| 976 |
+
"step": 1380
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 0.4448,
|
| 980 |
+
"grad_norm": 3.154206643410634,
|
| 981 |
+
"learning_rate": 3.4009589697773605e-06,
|
| 982 |
+
"loss": 0.3260640621185303,
|
| 983 |
+
"step": 1390
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 0.448,
|
| 987 |
+
"grad_norm": 3.4230687653412564,
|
| 988 |
+
"learning_rate": 3.3748498465002475e-06,
|
| 989 |
+
"loss": 0.32304584980010986,
|
| 990 |
+
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 0.4512,
|
| 994 |
+
"grad_norm": 2.6276396964869684,
|
| 995 |
+
"learning_rate": 3.3486315293890693e-06,
|
| 996 |
+
"loss": 0.33318138122558594,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 0.4544,
|
| 1001 |
+
"grad_norm": 2.754821177049362,
|
| 1002 |
+
"learning_rate": 3.3223072908666053e-06,
|
| 1003 |
+
"loss": 0.32256054878234863,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 0.4576,
|
| 1008 |
+
"grad_norm": 2.881952130772473,
|
| 1009 |
+
"learning_rate": 3.295880416576153e-06,
|
| 1010 |
+
"loss": 0.33387539386749265,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 0.4608,
|
| 1015 |
+
"grad_norm": 2.5217047707442966,
|
| 1016 |
+
"learning_rate": 3.269354204971427e-06,
|
| 1017 |
+
"loss": 0.32321481704711913,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 0.464,
|
| 1022 |
+
"grad_norm": 2.976679985492794,
|
| 1023 |
+
"learning_rate": 3.242731966904865e-06,
|
| 1024 |
+
"loss": 0.32245721817016604,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 0.4672,
|
| 1029 |
+
"grad_norm": 2.527563459090948,
|
| 1030 |
+
"learning_rate": 3.2160170252143913e-06,
|
| 1031 |
+
"loss": 0.32239205837249757,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 0.4704,
|
| 1036 |
+
"grad_norm": 1.997832889519553,
|
| 1037 |
+
"learning_rate": 3.1892127143086716e-06,
|
| 1038 |
+
"loss": 0.32758924961090086,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 0.4736,
|
| 1043 |
+
"grad_norm": 2.299101703675196,
|
| 1044 |
+
"learning_rate": 3.1623223797509347e-06,
|
| 1045 |
+
"loss": 0.31891183853149413,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 0.4768,
|
| 1050 |
+
"grad_norm": 2.9210746413068907,
|
| 1051 |
+
"learning_rate": 3.135349377841396e-06,
|
| 1052 |
+
"loss": 0.32430353164672854,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 0.48,
|
| 1057 |
+
"grad_norm": 2.6265609696149146,
|
| 1058 |
+
"learning_rate": 3.1082970751983497e-06,
|
| 1059 |
+
"loss": 0.3312281608581543,
|
| 1060 |
+
"step": 1500
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 0.4832,
|
| 1064 |
+
"grad_norm": 2.5956160397204786,
|
| 1065 |
+
"learning_rate": 3.0811688483379546e-06,
|
| 1066 |
+
"loss": 0.3238035202026367,
|
| 1067 |
+
"step": 1510
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 0.4864,
|
| 1071 |
+
"grad_norm": 2.231793404952503,
|
| 1072 |
+
"learning_rate": 3.0539680832528074e-06,
|
| 1073 |
+
"loss": 0.32330875396728515,
|
| 1074 |
+
"step": 1520
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 0.4896,
|
| 1078 |
+
"grad_norm": 2.5723097920479763,
|
| 1079 |
+
"learning_rate": 3.026698174989316e-06,
|
| 1080 |
+
"loss": 0.32520170211791993,
|
| 1081 |
+
"step": 1530
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.4928,
|
| 1085 |
+
"grad_norm": 2.691498291676849,
|
| 1086 |
+
"learning_rate": 2.999362527223952e-06,
|
| 1087 |
+
"loss": 0.3273704290390015,
|
| 1088 |
+
"step": 1540
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.496,
|
| 1092 |
+
"grad_norm": 2.0511124933056375,
|
| 1093 |
+
"learning_rate": 2.9719645518384194e-06,
|
| 1094 |
+
"loss": 0.3250606536865234,
|
| 1095 |
+
"step": 1550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.4992,
|
| 1099 |
+
"grad_norm": 2.872290392112785,
|
| 1100 |
+
"learning_rate": 2.944507668493807e-06,
|
| 1101 |
+
"loss": 0.3281686782836914,
|
| 1102 |
+
"step": 1560
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.5024,
|
| 1106 |
+
"grad_norm": 2.330246614888919,
|
| 1107 |
+
"learning_rate": 2.9169953042037623e-06,
|
| 1108 |
+
"loss": 0.32374157905578616,
|
| 1109 |
+
"step": 1570
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.5056,
|
| 1113 |
+
"grad_norm": 2.0520711406500394,
|
| 1114 |
+
"learning_rate": 2.889430892906754e-06,
|
| 1115 |
+
"loss": 0.3169667720794678,
|
| 1116 |
+
"step": 1580
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.5088,
|
| 1120 |
+
"grad_norm": 2.048670737699487,
|
| 1121 |
+
"learning_rate": 2.861817875037462e-06,
|
| 1122 |
+
"loss": 0.3160442590713501,
|
| 1123 |
+
"step": 1590
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.512,
|
| 1127 |
+
"grad_norm": 2.8695840695234303,
|
| 1128 |
+
"learning_rate": 2.8341596970973683e-06,
|
| 1129 |
+
"loss": 0.32544608116149903,
|
| 1130 |
+
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.5152,
|
| 1134 |
+
"grad_norm": 1.976397223627746,
|
| 1135 |
+
"learning_rate": 2.80645981122458e-06,
|
| 1136 |
+
"loss": 0.3229134798049927,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.5184,
|
| 1141 |
+
"grad_norm": 2.7070609575351807,
|
| 1142 |
+
"learning_rate": 2.7787216747629508e-06,
|
| 1143 |
+
"loss": 0.32655487060546873,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.5216,
|
| 1148 |
+
"grad_norm": 2.6027463070090993,
|
| 1149 |
+
"learning_rate": 2.7509487498305615e-06,
|
| 1150 |
+
"loss": 0.31430754661560056,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.5248,
|
| 1155 |
+
"grad_norm": 2.4274539931656585,
|
| 1156 |
+
"learning_rate": 2.7231445028875924e-06,
|
| 1157 |
+
"loss": 0.3237884759902954,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.528,
|
| 1162 |
+
"grad_norm": 1.9308598632845329,
|
| 1163 |
+
"learning_rate": 2.6953124043036604e-06,
|
| 1164 |
+
"loss": 0.32111692428588867,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.5312,
|
| 1169 |
+
"grad_norm": 2.1321964485217784,
|
| 1170 |
+
"learning_rate": 2.667455927924667e-06,
|
| 1171 |
+
"loss": 0.3178241729736328,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.5344,
|
| 1176 |
+
"grad_norm": 3.1390388403682534,
|
| 1177 |
+
"learning_rate": 2.6395785506392164e-06,
|
| 1178 |
+
"loss": 0.31754770278930666,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.5376,
|
| 1183 |
+
"grad_norm": 2.137535651695072,
|
| 1184 |
+
"learning_rate": 2.6116837519446407e-06,
|
| 1185 |
+
"loss": 0.3183767795562744,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.5408,
|
| 1190 |
+
"grad_norm": 2.353751591087722,
|
| 1191 |
+
"learning_rate": 2.5837750135127192e-06,
|
| 1192 |
+
"loss": 0.31382954120635986,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.544,
|
| 1197 |
+
"grad_norm": 2.58704039056448,
|
| 1198 |
+
"learning_rate": 2.555855818755108e-06,
|
| 1199 |
+
"loss": 0.3226866483688354,
|
| 1200 |
+
"step": 1700
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.5472,
|
| 1204 |
+
"grad_norm": 2.709677414439902,
|
| 1205 |
+
"learning_rate": 2.5279296523885636e-06,
|
| 1206 |
+
"loss": 0.3166576623916626,
|
| 1207 |
+
"step": 1710
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.5504,
|
| 1211 |
+
"grad_norm": 2.0859245317104107,
|
| 1212 |
+
"learning_rate": 2.5e-06,
|
| 1213 |
+
"loss": 0.3218212127685547,
|
| 1214 |
+
"step": 1720
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.5536,
|
| 1218 |
+
"grad_norm": 2.3347357869338436,
|
| 1219 |
+
"learning_rate": 2.472070347611437e-06,
|
| 1220 |
+
"loss": 0.31246294975280764,
|
| 1221 |
+
"step": 1730
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.5568,
|
| 1225 |
+
"grad_norm": 2.5799420800617106,
|
| 1226 |
+
"learning_rate": 2.444144181244893e-06,
|
| 1227 |
+
"loss": 0.31868853569030764,
|
| 1228 |
+
"step": 1740
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 0.56,
|
| 1232 |
+
"grad_norm": 2.8867509619529406,
|
| 1233 |
+
"learning_rate": 2.416224986487282e-06,
|
| 1234 |
+
"loss": 0.31381807327270506,
|
| 1235 |
+
"step": 1750
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 0.5632,
|
| 1239 |
+
"grad_norm": 2.625660671305278,
|
| 1240 |
+
"learning_rate": 2.3883162480553605e-06,
|
| 1241 |
+
"loss": 0.31146280765533446,
|
| 1242 |
+
"step": 1760
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 0.5664,
|
| 1246 |
+
"grad_norm": 2.8862495653341544,
|
| 1247 |
+
"learning_rate": 2.3604214493607844e-06,
|
| 1248 |
+
"loss": 0.3111546993255615,
|
| 1249 |
+
"step": 1770
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 0.5696,
|
| 1253 |
+
"grad_norm": 2.267020272744141,
|
| 1254 |
+
"learning_rate": 2.332544072075333e-06,
|
| 1255 |
+
"loss": 0.32178173065185545,
|
| 1256 |
+
"step": 1780
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 0.5728,
|
| 1260 |
+
"grad_norm": 2.073205643473978,
|
| 1261 |
+
"learning_rate": 2.30468759569634e-06,
|
| 1262 |
+
"loss": 0.31751441955566406,
|
| 1263 |
+
"step": 1790
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 0.576,
|
| 1267 |
+
"grad_norm": 2.232045258362397,
|
| 1268 |
+
"learning_rate": 2.276855497112408e-06,
|
| 1269 |
+
"loss": 0.3135702610015869,
|
| 1270 |
+
"step": 1800
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 0.5792,
|
| 1274 |
+
"grad_norm": 3.4632505976937744,
|
| 1275 |
+
"learning_rate": 2.2490512501694394e-06,
|
| 1276 |
+
"loss": 0.3126095771789551,
|
| 1277 |
+
"step": 1810
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 0.5824,
|
| 1281 |
+
"grad_norm": 2.7008114205550022,
|
| 1282 |
+
"learning_rate": 2.2212783252370496e-06,
|
| 1283 |
+
"loss": 0.31725611686706545,
|
| 1284 |
+
"step": 1820
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 0.5856,
|
| 1288 |
+
"grad_norm": 2.640110404643157,
|
| 1289 |
+
"learning_rate": 2.1935401887754213e-06,
|
| 1290 |
+
"loss": 0.3210929870605469,
|
| 1291 |
+
"step": 1830
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 0.5888,
|
| 1295 |
+
"grad_norm": 2.9154181525967924,
|
| 1296 |
+
"learning_rate": 2.165840302902632e-06,
|
| 1297 |
+
"loss": 0.31817543506622314,
|
| 1298 |
+
"step": 1840
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 0.592,
|
| 1302 |
+
"grad_norm": 2.3435756622683916,
|
| 1303 |
+
"learning_rate": 2.1381821249625383e-06,
|
| 1304 |
+
"loss": 0.3186073303222656,
|
| 1305 |
+
"step": 1850
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 0.5952,
|
| 1309 |
+
"grad_norm": 2.391868801860604,
|
| 1310 |
+
"learning_rate": 2.1105691070932465e-06,
|
| 1311 |
+
"loss": 0.3081700563430786,
|
| 1312 |
+
"step": 1860
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 0.5984,
|
| 1316 |
+
"grad_norm": 2.27033295147997,
|
| 1317 |
+
"learning_rate": 2.083004695796238e-06,
|
| 1318 |
+
"loss": 0.30403599739074705,
|
| 1319 |
+
"step": 1870
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 0.6016,
|
| 1323 |
+
"grad_norm": 2.1095837820360157,
|
| 1324 |
+
"learning_rate": 2.055492331506194e-06,
|
| 1325 |
+
"loss": 0.31353535652160647,
|
| 1326 |
+
"step": 1880
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 0.6048,
|
| 1330 |
+
"grad_norm": 2.284519052184323,
|
| 1331 |
+
"learning_rate": 2.0280354481615814e-06,
|
| 1332 |
+
"loss": 0.31677517890930174,
|
| 1333 |
+
"step": 1890
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 0.608,
|
| 1337 |
+
"grad_norm": 2.237766836173548,
|
| 1338 |
+
"learning_rate": 2.000637472776049e-06,
|
| 1339 |
+
"loss": 0.3152945041656494,
|
| 1340 |
+
"step": 1900
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 0.6112,
|
| 1344 |
+
"grad_norm": 2.7842715157490434,
|
| 1345 |
+
"learning_rate": 1.973301825010685e-06,
|
| 1346 |
+
"loss": 0.30818216800689696,
|
| 1347 |
+
"step": 1910
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 0.6144,
|
| 1351 |
+
"grad_norm": 2.4813744091778784,
|
| 1352 |
+
"learning_rate": 1.9460319167471934e-06,
|
| 1353 |
+
"loss": 0.31820502281188967,
|
| 1354 |
+
"step": 1920
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 0.6176,
|
| 1358 |
+
"grad_norm": 2.0821248606030887,
|
| 1359 |
+
"learning_rate": 1.9188311516620466e-06,
|
| 1360 |
+
"loss": 0.31040709018707274,
|
| 1361 |
+
"step": 1930
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 0.6208,
|
| 1365 |
+
"grad_norm": 2.9336859566866975,
|
| 1366 |
+
"learning_rate": 1.891702924801651e-06,
|
| 1367 |
+
"loss": 0.31292426586151123,
|
| 1368 |
+
"step": 1940
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 0.624,
|
| 1372 |
+
"grad_norm": 2.511253012965921,
|
| 1373 |
+
"learning_rate": 1.864650622158604e-06,
|
| 1374 |
+
"loss": 0.32196660041809083,
|
| 1375 |
+
"step": 1950
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 0.6272,
|
| 1379 |
+
"grad_norm": 2.4545922236833455,
|
| 1380 |
+
"learning_rate": 1.8376776202490666e-06,
|
| 1381 |
+
"loss": 0.31464810371398927,
|
| 1382 |
+
"step": 1960
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 0.6304,
|
| 1386 |
+
"grad_norm": 2.277913414668649,
|
| 1387 |
+
"learning_rate": 1.8107872856913293e-06,
|
| 1388 |
+
"loss": 0.30748977661132815,
|
| 1389 |
+
"step": 1970
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 0.6336,
|
| 1393 |
+
"grad_norm": 3.6960663974743273,
|
| 1394 |
+
"learning_rate": 1.7839829747856096e-06,
|
| 1395 |
+
"loss": 0.31303911209106444,
|
| 1396 |
+
"step": 1980
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 0.6368,
|
| 1400 |
+
"grad_norm": 2.5169048193896844,
|
| 1401 |
+
"learning_rate": 1.7572680330951359e-06,
|
| 1402 |
+
"loss": 0.309541130065918,
|
| 1403 |
+
"step": 1990
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 0.64,
|
| 1407 |
+
"grad_norm": 2.625801312197355,
|
| 1408 |
+
"learning_rate": 1.7306457950285747e-06,
|
| 1409 |
+
"loss": 0.31228773593902587,
|
| 1410 |
+
"step": 2000
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 0.6432,
|
| 1414 |
+
"grad_norm": 3.166705714244592,
|
| 1415 |
+
"learning_rate": 1.704119583423848e-06,
|
| 1416 |
+
"loss": 0.30709683895111084,
|
| 1417 |
+
"step": 2010
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 0.6464,
|
| 1421 |
+
"grad_norm": 2.7529448920288755,
|
| 1422 |
+
"learning_rate": 1.677692709133396e-06,
|
| 1423 |
+
"loss": 0.3121641159057617,
|
| 1424 |
+
"step": 2020
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 0.6496,
|
| 1428 |
+
"grad_norm": 2.4164266641009386,
|
| 1429 |
+
"learning_rate": 1.6513684706109311e-06,
|
| 1430 |
+
"loss": 0.31612191200256345,
|
| 1431 |
+
"step": 2030
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 0.6528,
|
| 1435 |
+
"grad_norm": 2.1475852674178486,
|
| 1436 |
+
"learning_rate": 1.6251501534997529e-06,
|
| 1437 |
+
"loss": 0.30926761627197263,
|
| 1438 |
+
"step": 2040
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 0.656,
|
| 1442 |
+
"grad_norm": 3.027937409819003,
|
| 1443 |
+
"learning_rate": 1.5990410302226405e-06,
|
| 1444 |
+
"loss": 0.3059820652008057,
|
| 1445 |
+
"step": 2050
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 0.6592,
|
| 1449 |
+
"grad_norm": 2.3663528005893575,
|
| 1450 |
+
"learning_rate": 1.5730443595734162e-06,
|
| 1451 |
+
"loss": 0.30960190296173096,
|
| 1452 |
+
"step": 2060
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 0.6624,
|
| 1456 |
+
"grad_norm": 2.5495655090650806,
|
| 1457 |
+
"learning_rate": 1.5471633863101982e-06,
|
| 1458 |
+
"loss": 0.3146512508392334,
|
| 1459 |
+
"step": 2070
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 0.6656,
|
| 1463 |
+
"grad_norm": 2.563871195645732,
|
| 1464 |
+
"learning_rate": 1.521401340750407e-06,
|
| 1465 |
+
"loss": 0.3116560935974121,
|
| 1466 |
+
"step": 2080
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 0.6688,
|
| 1470 |
+
"grad_norm": 2.4316488926893314,
|
| 1471 |
+
"learning_rate": 1.495761438367577e-06,
|
| 1472 |
+
"loss": 0.31447796821594237,
|
| 1473 |
+
"step": 2090
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 0.672,
|
| 1477 |
+
"grad_norm": 2.446980089200077,
|
| 1478 |
+
"learning_rate": 1.4702468793900187e-06,
|
| 1479 |
+
"loss": 0.3153538703918457,
|
| 1480 |
+
"step": 2100
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 0.6752,
|
| 1484 |
+
"grad_norm": 2.2511595317617283,
|
| 1485 |
+
"learning_rate": 1.444860848401384e-06,
|
| 1486 |
+
"loss": 0.31273808479309084,
|
| 1487 |
+
"step": 2110
|
| 1488 |
+
},
|
| 1489 |
+
{
|
| 1490 |
+
"epoch": 0.6784,
|
| 1491 |
+
"grad_norm": 2.459748219135552,
|
| 1492 |
+
"learning_rate": 1.4196065139431866e-06,
|
| 1493 |
+
"loss": 0.31059865951538085,
|
| 1494 |
+
"step": 2120
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 0.6816,
|
| 1498 |
+
"grad_norm": 2.4570005490031805,
|
| 1499 |
+
"learning_rate": 1.3944870281193178e-06,
|
| 1500 |
+
"loss": 0.31122384071350095,
|
| 1501 |
+
"step": 2130
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 0.6848,
|
| 1505 |
+
"grad_norm": 2.5940034157380447,
|
| 1506 |
+
"learning_rate": 1.3695055262026208e-06,
|
| 1507 |
+
"loss": 0.3145638704299927,
|
| 1508 |
+
"step": 2140
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 0.688,
|
| 1512 |
+
"grad_norm": 2.8940635665298644,
|
| 1513 |
+
"learning_rate": 1.3446651262435679e-06,
|
| 1514 |
+
"loss": 0.31133465766906737,
|
| 1515 |
+
"step": 2150
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6912,
|
| 1519 |
+
"grad_norm": 2.2603444512196216,
|
| 1520 |
+
"learning_rate": 1.3199689286810746e-06,
|
| 1521 |
+
"loss": 0.31110968589782717,
|
| 1522 |
+
"step": 2160
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6944,
|
| 1526 |
+
"grad_norm": 2.3697248986342223,
|
| 1527 |
+
"learning_rate": 1.2954200159555294e-06,
|
| 1528 |
+
"loss": 0.3046250820159912,
|
| 1529 |
+
"step": 2170
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6976,
|
| 1533 |
+
"grad_norm": 2.9149559965372083,
|
| 1534 |
+
"learning_rate": 1.2710214521240527e-06,
|
| 1535 |
+
"loss": 0.3056375503540039,
|
| 1536 |
+
"step": 2180
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.7008,
|
| 1540 |
+
"grad_norm": 2.785583016537511,
|
| 1541 |
+
"learning_rate": 1.246776282478063e-06,
|
| 1542 |
+
"loss": 0.3074607849121094,
|
| 1543 |
+
"step": 2190
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.704,
|
| 1547 |
+
"grad_norm": 2.238483419316128,
|
| 1548 |
+
"learning_rate": 1.222687533163181e-06,
|
| 1549 |
+
"loss": 0.30821986198425294,
|
| 1550 |
+
"step": 2200
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.7072,
|
| 1554 |
+
"grad_norm": 2.0963873111402225,
|
| 1555 |
+
"learning_rate": 1.1987582108015228e-06,
|
| 1556 |
+
"loss": 0.31098227500915526,
|
| 1557 |
+
"step": 2210
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 0.7104,
|
| 1561 |
+
"grad_norm": 2.3511311934322725,
|
| 1562 |
+
"learning_rate": 1.1749913021164255e-06,
|
| 1563 |
+
"loss": 0.3125911712646484,
|
| 1564 |
+
"step": 2220
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 0.7136,
|
| 1568 |
+
"grad_norm": 2.0182013166602735,
|
| 1569 |
+
"learning_rate": 1.1513897735596702e-06,
|
| 1570 |
+
"loss": 0.30506420135498047,
|
| 1571 |
+
"step": 2230
|
| 1572 |
+
},
|
| 1573 |
+
{
|
| 1574 |
+
"epoch": 0.7168,
|
| 1575 |
+
"grad_norm": 2.0904990978865654,
|
| 1576 |
+
"learning_rate": 1.127956570941218e-06,
|
| 1577 |
+
"loss": 0.30170474052429197,
|
| 1578 |
+
"step": 2240
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 0.72,
|
| 1582 |
+
"grad_norm": 2.3591898483151525,
|
| 1583 |
+
"learning_rate": 1.104694619061533e-06,
|
| 1584 |
+
"loss": 0.3140627145767212,
|
| 1585 |
+
"step": 2250
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 0.7232,
|
| 1589 |
+
"grad_norm": 2.3874798738589553,
|
| 1590 |
+
"learning_rate": 1.0816068213465295e-06,
|
| 1591 |
+
"loss": 0.3148207187652588,
|
| 1592 |
+
"step": 2260
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 0.7264,
|
| 1596 |
+
"grad_norm": 2.462173136321867,
|
| 1597 |
+
"learning_rate": 1.0586960594851762e-06,
|
| 1598 |
+
"loss": 0.30828402042388914,
|
| 1599 |
+
"step": 2270
|
| 1600 |
+
},
|
| 1601 |
+
{
|
| 1602 |
+
"epoch": 0.7296,
|
| 1603 |
+
"grad_norm": 2.2877287929832946,
|
| 1604 |
+
"learning_rate": 1.0359651930698217e-06,
|
| 1605 |
+
"loss": 0.30725433826446535,
|
| 1606 |
+
"step": 2280
|
| 1607 |
+
},
|
| 1608 |
+
{
|
| 1609 |
+
"epoch": 0.7328,
|
| 1610 |
+
"grad_norm": 2.5585705908550413,
|
| 1611 |
+
"learning_rate": 1.0134170592392837e-06,
|
| 1612 |
+
"loss": 0.30991530418395996,
|
| 1613 |
+
"step": 2290
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"epoch": 0.736,
|
| 1617 |
+
"grad_norm": 2.415441399008779,
|
| 1618 |
+
"learning_rate": 9.910544723247204e-07,
|
| 1619 |
+
"loss": 0.31087689399719237,
|
| 1620 |
+
"step": 2300
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 0.7392,
|
| 1624 |
+
"grad_norm": 2.6450690086623285,
|
| 1625 |
+
"learning_rate": 9.688802234983706e-07,
|
| 1626 |
+
"loss": 0.3067446231842041,
|
| 1627 |
+
"step": 2310
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"epoch": 0.7424,
|
| 1631 |
+
"grad_norm": 2.363123649822279,
|
| 1632 |
+
"learning_rate": 9.468970804251742e-07,
|
| 1633 |
+
"loss": 0.30767192840576174,
|
| 1634 |
+
"step": 2320
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 0.7456,
|
| 1638 |
+
"grad_norm": 2.245412676348008,
|
| 1639 |
+
"learning_rate": 9.251077869173244e-07,
|
| 1640 |
+
"loss": 0.30107917785644533,
|
| 1641 |
+
"step": 2330
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"epoch": 0.7488,
|
| 1645 |
+
"grad_norm": 2.5736642361970503,
|
| 1646 |
+
"learning_rate": 9.035150625918054e-07,
|
| 1647 |
+
"loss": 0.303986120223999,
|
| 1648 |
+
"step": 2340
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 0.752,
|
| 1652 |
+
"grad_norm": 2.6844109007429138,
|
| 1653 |
+
"learning_rate": 8.821216025309395e-07,
|
| 1654 |
+
"loss": 0.3074802875518799,
|
| 1655 |
+
"step": 2350
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 0.7552,
|
| 1659 |
+
"grad_norm": 2.412670568786912,
|
| 1660 |
+
"learning_rate": 8.609300769460055e-07,
|
| 1661 |
+
"loss": 0.30130510330200194,
|
| 1662 |
+
"step": 2360
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 0.7584,
|
| 1666 |
+
"grad_norm": 3.176069141824472,
|
| 1667 |
+
"learning_rate": 8.399431308439592e-07,
|
| 1668 |
+
"loss": 0.3105806827545166,
|
| 1669 |
+
"step": 2370
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 0.7616,
|
| 1673 |
+
"grad_norm": 2.23339472526297,
|
| 1674 |
+
"learning_rate": 8.191633836972962e-07,
|
| 1675 |
+
"loss": 0.3084972620010376,
|
| 1676 |
+
"step": 2380
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 0.7648,
|
| 1680 |
+
"grad_norm": 2.6912839020724175,
|
| 1681 |
+
"learning_rate": 7.985934291171024e-07,
|
| 1682 |
+
"loss": 0.3067460536956787,
|
| 1683 |
+
"step": 2390
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 0.768,
|
| 1687 |
+
"grad_norm": 2.5426618104677976,
|
| 1688 |
+
"learning_rate": 7.7823583452934e-07,
|
| 1689 |
+
"loss": 0.30809898376464845,
|
| 1690 |
+
"step": 2400
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 0.7712,
|
| 1694 |
+
"grad_norm": 2.55531536817282,
|
| 1695 |
+
"learning_rate": 7.58093140854389e-07,
|
| 1696 |
+
"loss": 0.3071744441986084,
|
| 1697 |
+
"step": 2410
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 0.7744,
|
| 1701 |
+
"grad_norm": 2.285863017236424,
|
| 1702 |
+
"learning_rate": 7.381678621899077e-07,
|
| 1703 |
+
"loss": 0.3093477725982666,
|
| 1704 |
+
"step": 2420
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 0.7776,
|
| 1708 |
+
"grad_norm": 2.3600405361881767,
|
| 1709 |
+
"learning_rate": 7.184624854970379e-07,
|
| 1710 |
+
"loss": 0.30798888206481934,
|
| 1711 |
+
"step": 2430
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 0.7808,
|
| 1715 |
+
"grad_norm": 2.0247328579355726,
|
| 1716 |
+
"learning_rate": 6.989794702899932e-07,
|
| 1717 |
+
"loss": 0.3048464298248291,
|
| 1718 |
+
"step": 2440
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 0.784,
|
| 1722 |
+
"grad_norm": 2.7079172300622334,
|
| 1723 |
+
"learning_rate": 6.797212483290777e-07,
|
| 1724 |
+
"loss": 0.3093360424041748,
|
| 1725 |
+
"step": 2450
|
| 1726 |
+
},
|
| 1727 |
+
{
|
| 1728 |
+
"epoch": 0.7872,
|
| 1729 |
+
"grad_norm": 2.8011999237207967,
|
| 1730 |
+
"learning_rate": 6.60690223317171e-07,
|
| 1731 |
+
"loss": 0.30233092308044435,
|
| 1732 |
+
"step": 2460
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 0.7904,
|
| 1736 |
+
"grad_norm": 2.202966089641912,
|
| 1737 |
+
"learning_rate": 6.418887705997046e-07,
|
| 1738 |
+
"loss": 0.3048731327056885,
|
| 1739 |
+
"step": 2470
|
| 1740 |
+
},
|
| 1741 |
+
{
|
| 1742 |
+
"epoch": 0.7936,
|
| 1743 |
+
"grad_norm": 2.6510546903467755,
|
| 1744 |
+
"learning_rate": 6.23319236868189e-07,
|
| 1745 |
+
"loss": 0.3104764461517334,
|
| 1746 |
+
"step": 2480
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 0.7968,
|
| 1750 |
+
"grad_norm": 2.510992490322273,
|
| 1751 |
+
"learning_rate": 6.049839398673141e-07,
|
| 1752 |
+
"loss": 0.31223044395446775,
|
| 1753 |
+
"step": 2490
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 0.8,
|
| 1757 |
+
"grad_norm": 2.7988283248607604,
|
| 1758 |
+
"learning_rate": 5.868851681056567e-07,
|
| 1759 |
+
"loss": 0.3109541893005371,
|
| 1760 |
+
"step": 2500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 0.8032,
|
| 1764 |
+
"grad_norm": 2.370572243788772,
|
| 1765 |
+
"learning_rate": 5.690251805700467e-07,
|
| 1766 |
+
"loss": 0.3075347900390625,
|
| 1767 |
+
"step": 2510
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 0.8064,
|
| 1771 |
+
"grad_norm": 2.057318428676814,
|
| 1772 |
+
"learning_rate": 5.514062064436096e-07,
|
| 1773 |
+
"loss": 0.30944228172302246,
|
| 1774 |
+
"step": 2520
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 0.8096,
|
| 1778 |
+
"grad_norm": 2.9526395601791937,
|
| 1779 |
+
"learning_rate": 5.34030444827533e-07,
|
| 1780 |
+
"loss": 0.30773684978485105,
|
| 1781 |
+
"step": 2530
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 0.8128,
|
| 1785 |
+
"grad_norm": 2.1808951881567165,
|
| 1786 |
+
"learning_rate": 5.169000644665895e-07,
|
| 1787 |
+
"loss": 0.30281686782836914,
|
| 1788 |
+
"step": 2540
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 0.816,
|
| 1792 |
+
"grad_norm": 2.501184820191482,
|
| 1793 |
+
"learning_rate": 5.000172034784442e-07,
|
| 1794 |
+
"loss": 0.30731327533721925,
|
| 1795 |
+
"step": 2550
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.8192,
|
| 1799 |
+
"grad_norm": 2.4433836822113304,
|
| 1800 |
+
"learning_rate": 4.833839690867853e-07,
|
| 1801 |
+
"loss": 0.30861892700195315,
|
| 1802 |
+
"step": 2560
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 0.8224,
|
| 1806 |
+
"grad_norm": 2.482955525732734,
|
| 1807 |
+
"learning_rate": 4.6700243735831705e-07,
|
| 1808 |
+
"loss": 0.3014340400695801,
|
| 1809 |
+
"step": 2570
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 0.8256,
|
| 1813 |
+
"grad_norm": 2.516375989369738,
|
| 1814 |
+
"learning_rate": 4.508746529436311e-07,
|
| 1815 |
+
"loss": 0.302032995223999,
|
| 1816 |
+
"step": 2580
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 0.8288,
|
| 1820 |
+
"grad_norm": 2.2676227598264926,
|
| 1821 |
+
"learning_rate": 4.350026288220083e-07,
|
| 1822 |
+
"loss": 0.30550131797790525,
|
| 1823 |
+
"step": 2590
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 0.832,
|
| 1827 |
+
"grad_norm": 2.3829531066293126,
|
| 1828 |
+
"learning_rate": 4.1938834605017133e-07,
|
| 1829 |
+
"loss": 0.3046237945556641,
|
| 1830 |
+
"step": 2600
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 0.8352,
|
| 1834 |
+
"grad_norm": 2.0018887466739548,
|
| 1835 |
+
"learning_rate": 4.0403375351501515e-07,
|
| 1836 |
+
"loss": 0.3024258852005005,
|
| 1837 |
+
"step": 2610
|
| 1838 |
+
},
|
| 1839 |
+
{
|
| 1840 |
+
"epoch": 0.8384,
|
| 1841 |
+
"grad_norm": 2.5182571334882597,
|
| 1842 |
+
"learning_rate": 3.88940767690362e-07,
|
| 1843 |
+
"loss": 0.3063870906829834,
|
| 1844 |
+
"step": 2620
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 0.8416,
|
| 1848 |
+
"grad_norm": 2.7441991027074355,
|
| 1849 |
+
"learning_rate": 3.7411127239775774e-07,
|
| 1850 |
+
"loss": 0.30306272506713866,
|
| 1851 |
+
"step": 2630
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 0.8448,
|
| 1855 |
+
"grad_norm": 2.161963722714269,
|
| 1856 |
+
"learning_rate": 3.595471185713431e-07,
|
| 1857 |
+
"loss": 0.3009947299957275,
|
| 1858 |
+
"step": 2640
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 0.848,
|
| 1862 |
+
"grad_norm": 2.7694143698141285,
|
| 1863 |
+
"learning_rate": 3.4525012402682826e-07,
|
| 1864 |
+
"loss": 0.30188300609588625,
|
| 1865 |
+
"step": 2650
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 0.8512,
|
| 1869 |
+
"grad_norm": 2.6814413975784217,
|
| 1870 |
+
"learning_rate": 3.3122207323460804e-07,
|
| 1871 |
+
"loss": 0.3024703025817871,
|
| 1872 |
+
"step": 2660
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 0.8544,
|
| 1876 |
+
"grad_norm": 2.4444711671869306,
|
| 1877 |
+
"learning_rate": 3.1746471709702963e-07,
|
| 1878 |
+
"loss": 0.3008608102798462,
|
| 1879 |
+
"step": 2670
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 0.8576,
|
| 1883 |
+
"grad_norm": 2.6886622183433015,
|
| 1884 |
+
"learning_rate": 3.039797727298585e-07,
|
| 1885 |
+
"loss": 0.30821614265441893,
|
| 1886 |
+
"step": 2680
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 0.8608,
|
| 1890 |
+
"grad_norm": 2.641784614909192,
|
| 1891 |
+
"learning_rate": 2.9076892324795546e-07,
|
| 1892 |
+
"loss": 0.30515303611755373,
|
| 1893 |
+
"step": 2690
|
| 1894 |
+
},
|
| 1895 |
+
{
|
| 1896 |
+
"epoch": 0.864,
|
| 1897 |
+
"grad_norm": 2.5595370943122444,
|
| 1898 |
+
"learning_rate": 2.778338175551995e-07,
|
| 1899 |
+
"loss": 0.3007267236709595,
|
| 1900 |
+
"step": 2700
|
| 1901 |
+
},
|
| 1902 |
+
{
|
| 1903 |
+
"epoch": 0.8672,
|
| 1904 |
+
"grad_norm": 2.283872628964803,
|
| 1905 |
+
"learning_rate": 2.6517607013868326e-07,
|
| 1906 |
+
"loss": 0.30617167949676516,
|
| 1907 |
+
"step": 2710
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 0.8704,
|
| 1911 |
+
"grad_norm": 2.558413840419693,
|
| 1912 |
+
"learning_rate": 2.527972608672002e-07,
|
| 1913 |
+
"loss": 0.3038905143737793,
|
| 1914 |
+
"step": 2720
|
| 1915 |
+
},
|
| 1916 |
+
{
|
| 1917 |
+
"epoch": 0.8736,
|
| 1918 |
+
"grad_norm": 2.4952676522317567,
|
| 1919 |
+
"learning_rate": 2.40698934794053e-07,
|
| 1920 |
+
"loss": 0.3054081201553345,
|
| 1921 |
+
"step": 2730
|
| 1922 |
+
},
|
| 1923 |
+
{
|
| 1924 |
+
"epoch": 0.8768,
|
| 1925 |
+
"grad_norm": 2.247637838190116,
|
| 1926 |
+
"learning_rate": 2.2888260196421237e-07,
|
| 1927 |
+
"loss": 0.3028261661529541,
|
| 1928 |
+
"step": 2740
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 0.88,
|
| 1932 |
+
"grad_norm": 2.5035963414447804,
|
| 1933 |
+
"learning_rate": 2.1734973722583735e-07,
|
| 1934 |
+
"loss": 0.3062435626983643,
|
| 1935 |
+
"step": 2750
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"epoch": 0.8832,
|
| 1939 |
+
"grad_norm": 1.918923632238423,
|
| 1940 |
+
"learning_rate": 2.0610178004619564e-07,
|
| 1941 |
+
"loss": 0.2972743034362793,
|
| 1942 |
+
"step": 2760
|
| 1943 |
+
},
|
| 1944 |
+
{
|
| 1945 |
+
"epoch": 0.8864,
|
| 1946 |
+
"grad_norm": 2.4603002546330845,
|
| 1947 |
+
"learning_rate": 1.9514013433199834e-07,
|
| 1948 |
+
"loss": 0.3119321346282959,
|
| 1949 |
+
"step": 2770
|
| 1950 |
+
},
|
| 1951 |
+
{
|
| 1952 |
+
"epoch": 0.8896,
|
| 1953 |
+
"grad_norm": 2.1315709346733667,
|
| 1954 |
+
"learning_rate": 1.8446616825416958e-07,
|
| 1955 |
+
"loss": 0.30900893211364744,
|
| 1956 |
+
"step": 2780
|
| 1957 |
+
},
|
| 1958 |
+
{
|
| 1959 |
+
"epoch": 0.8928,
|
| 1960 |
+
"grad_norm": 2.3753122188061218,
|
| 1961 |
+
"learning_rate": 1.7408121407708007e-07,
|
| 1962 |
+
"loss": 0.3069151401519775,
|
| 1963 |
+
"step": 2790
|
| 1964 |
+
},
|
| 1965 |
+
{
|
| 1966 |
+
"epoch": 0.896,
|
| 1967 |
+
"grad_norm": 2.207415755325001,
|
| 1968 |
+
"learning_rate": 1.6398656799226253e-07,
|
| 1969 |
+
"loss": 0.2986165523529053,
|
| 1970 |
+
"step": 2800
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 0.8992,
|
| 1974 |
+
"grad_norm": 2.178561452169741,
|
| 1975 |
+
"learning_rate": 1.5418348995662773e-07,
|
| 1976 |
+
"loss": 0.3010268688201904,
|
| 1977 |
+
"step": 2810
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 0.9024,
|
| 1981 |
+
"grad_norm": 2.5082064593439393,
|
| 1982 |
+
"learning_rate": 1.4467320353520275e-07,
|
| 1983 |
+
"loss": 0.2984073877334595,
|
| 1984 |
+
"step": 2820
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 0.9056,
|
| 1988 |
+
"grad_norm": 2.366814729694057,
|
| 1989 |
+
"learning_rate": 1.3545689574841341e-07,
|
| 1990 |
+
"loss": 0.3026757001876831,
|
| 1991 |
+
"step": 2830
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 0.9088,
|
| 1995 |
+
"grad_norm": 2.380709246306716,
|
| 1996 |
+
"learning_rate": 1.26535716923927e-07,
|
| 1997 |
+
"loss": 0.310437536239624,
|
| 1998 |
+
"step": 2840
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"epoch": 0.912,
|
| 2002 |
+
"grad_norm": 2.484246324702375,
|
| 2003 |
+
"learning_rate": 1.1791078055307493e-07,
|
| 2004 |
+
"loss": 0.30369887351989744,
|
| 2005 |
+
"step": 2850
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 0.9152,
|
| 2009 |
+
"grad_norm": 2.6412244000001786,
|
| 2010 |
+
"learning_rate": 1.0958316315187289e-07,
|
| 2011 |
+
"loss": 0.3044759750366211,
|
| 2012 |
+
"step": 2860
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 0.9184,
|
| 2016 |
+
"grad_norm": 2.4542916560781967,
|
| 2017 |
+
"learning_rate": 1.0155390412665528e-07,
|
| 2018 |
+
"loss": 0.30136928558349607,
|
| 2019 |
+
"step": 2870
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 0.9216,
|
| 2023 |
+
"grad_norm": 2.631911471911446,
|
| 2024 |
+
"learning_rate": 9.38240056443443e-08,
|
| 2025 |
+
"loss": 0.30144243240356444,
|
| 2026 |
+
"step": 2880
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 0.9248,
|
| 2030 |
+
"grad_norm": 2.2530200189747243,
|
| 2031 |
+
"learning_rate": 8.639443250736402e-08,
|
| 2032 |
+
"loss": 0.3027902603149414,
|
| 2033 |
+
"step": 2890
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 0.928,
|
| 2037 |
+
"grad_norm": 3.1331936934174123,
|
| 2038 |
+
"learning_rate": 7.926611203321777e-08,
|
| 2039 |
+
"loss": 0.30441856384277344,
|
| 2040 |
+
"step": 2900
|
| 2041 |
+
},
|
| 2042 |
+
{
|
| 2043 |
+
"epoch": 0.9312,
|
| 2044 |
+
"grad_norm": 2.5134219010551067,
|
| 2045 |
+
"learning_rate": 7.243993393874882e-08,
|
| 2046 |
+
"loss": 0.306389307975769,
|
| 2047 |
+
"step": 2910
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 0.9344,
|
| 2051 |
+
"grad_norm": 2.372785201514508,
|
| 2052 |
+
"learning_rate": 6.591675022908805e-08,
|
| 2053 |
+
"loss": 0.30292179584503176,
|
| 2054 |
+
"step": 2920
|
| 2055 |
+
},
|
| 2056 |
+
{
|
| 2057 |
+
"epoch": 0.9376,
|
| 2058 |
+
"grad_norm": 2.407913531878434,
|
| 2059 |
+
"learning_rate": 5.969737509131241e-08,
|
| 2060 |
+
"loss": 0.29895825386047364,
|
| 2061 |
+
"step": 2930
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"epoch": 0.9408,
|
| 2065 |
+
"grad_norm": 2.2376435379528865,
|
| 2066 |
+
"learning_rate": 5.3782584792823334e-08,
|
| 2067 |
+
"loss": 0.30271134376525877,
|
| 2068 |
+
"step": 2940
|
| 2069 |
+
},
|
| 2070 |
+
{
|
| 2071 |
+
"epoch": 0.944,
|
| 2072 |
+
"grad_norm": 2.653290725438786,
|
| 2073 |
+
"learning_rate": 4.817311758445686e-08,
|
| 2074 |
+
"loss": 0.3062829732894897,
|
| 2075 |
+
"step": 2950
|
| 2076 |
+
},
|
| 2077 |
+
{
|
| 2078 |
+
"epoch": 0.9472,
|
| 2079 |
+
"grad_norm": 2.42511171945876,
|
| 2080 |
+
"learning_rate": 4.286967360833866e-08,
|
| 2081 |
+
"loss": 0.3066932439804077,
|
| 2082 |
+
"step": 2960
|
| 2083 |
+
},
|
| 2084 |
+
{
|
| 2085 |
+
"epoch": 0.9504,
|
| 2086 |
+
"grad_norm": 2.1534299736877895,
|
| 2087 |
+
"learning_rate": 3.787291481049754e-08,
|
| 2088 |
+
"loss": 0.3068870544433594,
|
| 2089 |
+
"step": 2970
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 0.9536,
|
| 2093 |
+
"grad_norm": 2.209956884835794,
|
| 2094 |
+
"learning_rate": 3.3183464858244364e-08,
|
| 2095 |
+
"loss": 0.31453580856323243,
|
| 2096 |
+
"step": 2980
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 0.9568,
|
| 2100 |
+
"grad_norm": 2.5928568899987017,
|
| 2101 |
+
"learning_rate": 2.8801909062328992e-08,
|
| 2102 |
+
"loss": 0.2991969108581543,
|
| 2103 |
+
"step": 2990
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 0.96,
|
| 2107 |
+
"grad_norm": 2.385980918167846,
|
| 2108 |
+
"learning_rate": 2.4728794303886248e-08,
|
| 2109 |
+
"loss": 0.2963397026062012,
|
| 2110 |
+
"step": 3000
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 0.9632,
|
| 2114 |
+
"grad_norm": 2.374100986684654,
|
| 2115 |
+
"learning_rate": 2.0964628966175794e-08,
|
| 2116 |
+
"loss": 0.30301966667175295,
|
| 2117 |
+
"step": 3010
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 0.9664,
|
| 2121 |
+
"grad_norm": 2.094256605734986,
|
| 2122 |
+
"learning_rate": 1.750988287113009e-08,
|
| 2123 |
+
"loss": 0.2994666576385498,
|
| 2124 |
+
"step": 3020
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 0.9696,
|
| 2128 |
+
"grad_norm": 1.916185239441286,
|
| 2129 |
+
"learning_rate": 1.4364987220713278e-08,
|
| 2130 |
+
"loss": 0.3080729007720947,
|
| 2131 |
+
"step": 3030
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 0.9728,
|
| 2135 |
+
"grad_norm": 2.3446521041543207,
|
| 2136 |
+
"learning_rate": 1.1530334543099763e-08,
|
| 2137 |
+
"loss": 0.3026130199432373,
|
| 2138 |
+
"step": 3040
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 0.976,
|
| 2142 |
+
"grad_norm": 2.5854178252734323,
|
| 2143 |
+
"learning_rate": 9.006278643683697e-09,
|
| 2144 |
+
"loss": 0.309655499458313,
|
| 2145 |
+
"step": 3050
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 0.9792,
|
| 2149 |
+
"grad_norm": 1.9908162772434517,
|
| 2150 |
+
"learning_rate": 6.793134560916514e-09,
|
| 2151 |
+
"loss": 0.31186389923095703,
|
| 2152 |
+
"step": 3060
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.9824,
|
| 2156 |
+
"grad_norm": 2.1977962094508534,
|
| 2157 |
+
"learning_rate": 4.891178526986451e-09,
|
| 2158 |
+
"loss": 0.30645883083343506,
|
| 2159 |
+
"step": 3070
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 0.9856,
|
| 2163 |
+
"grad_norm": 2.2397406638818147,
|
| 2164 |
+
"learning_rate": 3.3006479333413943e-09,
|
| 2165 |
+
"loss": 0.3090504169464111,
|
| 2166 |
+
"step": 3080
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.9888,
|
| 2170 |
+
"grad_norm": 2.0435901319475036,
|
| 2171 |
+
"learning_rate": 2.021741301058422e-09,
|
| 2172 |
+
"loss": 0.3049570322036743,
|
| 2173 |
+
"step": 3090
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.992,
|
| 2177 |
+
"grad_norm": 2.371036869409615,
|
| 2178 |
+
"learning_rate": 1.0546182560652872e-09,
|
| 2179 |
+
"loss": 0.3073274612426758,
|
| 2180 |
+
"step": 3100
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.9952,
|
| 2184 |
+
"grad_norm": 2.2551729202130457,
|
| 2185 |
+
"learning_rate": 3.9939950921774607e-10,
|
| 2186 |
+
"loss": 0.30047030448913575,
|
| 2187 |
+
"step": 3110
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.9984,
|
| 2191 |
+
"grad_norm": 2.2067081414460827,
|
| 2192 |
+
"learning_rate": 5.616684123160854e-11,
|
| 2193 |
+
"loss": 0.3023503065109253,
|
| 2194 |
+
"step": 3120
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 1.0,
|
| 2198 |
+
"step": 3125,
|
| 2199 |
+
"total_flos": 1.0913057758773248e+16,
|
| 2200 |
+
"train_loss": 0.7292402684783935,
|
| 2201 |
+
"train_runtime": 30167.0559,
|
| 2202 |
+
"train_samples_per_second": 6.63,
|
| 2203 |
+
"train_steps_per_second": 0.104
|
| 2204 |
+
}
|
| 2205 |
+
],
|
| 2206 |
+
"logging_steps": 10,
|
| 2207 |
+
"max_steps": 3125,
|
| 2208 |
+
"num_input_tokens_seen": 0,
|
| 2209 |
+
"num_train_epochs": 1,
|
| 2210 |
+
"save_steps": 500,
|
| 2211 |
+
"stateful_callbacks": {
|
| 2212 |
+
"TrainerControl": {
|
| 2213 |
+
"args": {
|
| 2214 |
+
"should_epoch_stop": false,
|
| 2215 |
+
"should_evaluate": false,
|
| 2216 |
+
"should_log": false,
|
| 2217 |
+
"should_save": true,
|
| 2218 |
+
"should_training_stop": true
|
| 2219 |
+
},
|
| 2220 |
+
"attributes": {}
|
| 2221 |
+
}
|
| 2222 |
+
},
|
| 2223 |
+
"total_flos": 1.0913057758773248e+16,
|
| 2224 |
+
"train_batch_size": 4,
|
| 2225 |
+
"trial_name": null,
|
| 2226 |
+
"trial_params": null
|
| 2227 |
+
}
|
checkpoints/Gemma-4-E4B-it-SFT/training_loss.png
ADDED
|
checkpoints/InternVL3.5-8B-SFT/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1955525886476288.0,
|
| 4 |
+
"train_loss": 0.1948647116279602,
|
| 5 |
+
"train_runtime": 28413.61,
|
| 6 |
+
"train_samples_per_second": 7.039,
|
| 7 |
+
"train_steps_per_second": 0.11
|
| 8 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/chat_template.jinja
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% for message in messages %}{{'<|im_start|>' + message['role'] + '
|
| 2 |
+
'}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image>
|
| 3 |
+
' }}{% elif content['type'] == 'video' %}{{ '<video>
|
| 4 |
+
' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
|
| 5 |
+
'}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
|
| 6 |
+
' }}{% endif %}
|
checkpoints/InternVL3.5-8B-SFT/config.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"downsample_ratio": 0.5,
|
| 6 |
+
"dtype": "bfloat16",
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"hidden_size": 4096,
|
| 9 |
+
"image_seq_length": 256,
|
| 10 |
+
"image_token_id": 151671,
|
| 11 |
+
"model_type": "internvl",
|
| 12 |
+
"pad_token_id": 151643,
|
| 13 |
+
"projector_hidden_act": "gelu",
|
| 14 |
+
"text_config": {
|
| 15 |
+
"attention_bias": false,
|
| 16 |
+
"attention_dropout": 0.0,
|
| 17 |
+
"bos_token_id": 151643,
|
| 18 |
+
"debug": false,
|
| 19 |
+
"dtype": "bfloat16",
|
| 20 |
+
"eos_token_id": 151645,
|
| 21 |
+
"ep_size": 1,
|
| 22 |
+
"head_dim": 128,
|
| 23 |
+
"hidden_act": "silu",
|
| 24 |
+
"hidden_size": 4096,
|
| 25 |
+
"initializer_range": 0.02,
|
| 26 |
+
"intermediate_size": 12288,
|
| 27 |
+
"layer_types": [
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention",
|
| 60 |
+
"full_attention",
|
| 61 |
+
"full_attention",
|
| 62 |
+
"full_attention",
|
| 63 |
+
"full_attention"
|
| 64 |
+
],
|
| 65 |
+
"max_position_embeddings": 40960,
|
| 66 |
+
"max_window_layers": 36,
|
| 67 |
+
"micro_forward": false,
|
| 68 |
+
"model_type": "qwen3",
|
| 69 |
+
"num_attention_heads": 32,
|
| 70 |
+
"num_hidden_layers": 36,
|
| 71 |
+
"num_key_value_heads": 8,
|
| 72 |
+
"pad_token_id": null,
|
| 73 |
+
"rms_norm_eps": 1e-06,
|
| 74 |
+
"rope_parameters": {
|
| 75 |
+
"rope_theta": 1000000,
|
| 76 |
+
"rope_type": "default"
|
| 77 |
+
},
|
| 78 |
+
"skip_checkpoint": false,
|
| 79 |
+
"sliding_window": null,
|
| 80 |
+
"tie_word_embeddings": false,
|
| 81 |
+
"use_cache": false,
|
| 82 |
+
"use_deepep": false,
|
| 83 |
+
"use_sliding_window": false,
|
| 84 |
+
"vocab_size": 151936
|
| 85 |
+
},
|
| 86 |
+
"tie_word_embeddings": false,
|
| 87 |
+
"transformers_version": "5.5.3",
|
| 88 |
+
"use_cache": false,
|
| 89 |
+
"vision_config": {
|
| 90 |
+
"attention_bias": true,
|
| 91 |
+
"attention_dropout": 0.0,
|
| 92 |
+
"dtype": "bfloat16",
|
| 93 |
+
"hidden_act": "gelu",
|
| 94 |
+
"hidden_dropout_prob": 0.0,
|
| 95 |
+
"hidden_size": 1024,
|
| 96 |
+
"image_size": [
|
| 97 |
+
448,
|
| 98 |
+
448
|
| 99 |
+
],
|
| 100 |
+
"initializer_range": 0.02,
|
| 101 |
+
"intermediate_size": 4096,
|
| 102 |
+
"layer_norm_eps": 1e-06,
|
| 103 |
+
"layer_scale_init_value": 0.1,
|
| 104 |
+
"model_type": "internvl_vision",
|
| 105 |
+
"norm_type": "layer_norm",
|
| 106 |
+
"num_attention_heads": 16,
|
| 107 |
+
"num_channels": 3,
|
| 108 |
+
"num_hidden_layers": 24,
|
| 109 |
+
"patch_size": [
|
| 110 |
+
14,
|
| 111 |
+
14
|
| 112 |
+
],
|
| 113 |
+
"projection_dropout": 0.0,
|
| 114 |
+
"use_absolute_position_embeddings": true,
|
| 115 |
+
"use_mask_token": false,
|
| 116 |
+
"use_mean_pooling": true,
|
| 117 |
+
"use_qk_norm": false
|
| 118 |
+
},
|
| 119 |
+
"vision_feature_layer": -1,
|
| 120 |
+
"vision_feature_select_strategy": "default"
|
| 121 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/eval_results_job_internvl35_8b_internvl35_8b_20260430_002347.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mae_dx": 0.1673913793103448,
|
| 3 |
+
"rmse_dx": 0.558917781258475,
|
| 4 |
+
"mae_dy": 0.14736034482758623,
|
| 5 |
+
"rmse_dy": 0.43480112378118263,
|
| 6 |
+
"mae_dz": 0.014543103448275864,
|
| 7 |
+
"rmse_dz": 0.08856353651475307,
|
| 8 |
+
"mae_dpitch": 0.3550155172413793,
|
| 9 |
+
"rmse_dpitch": 0.7902604437560551,
|
| 10 |
+
"mae_dyaw": 1.1672620689655173,
|
| 11 |
+
"rmse_dyaw": 2.7476475518096417,
|
| 12 |
+
"mae_droll": 0.0,
|
| 13 |
+
"rmse_droll": 0.0,
|
| 14 |
+
"mae_overall": 0.3085954022988506,
|
| 15 |
+
"mae_position": 0.10976494252873564,
|
| 16 |
+
"mae_rotation": 0.5074258620689654,
|
| 17 |
+
"rmse_overall": 1.2030075672648746,
|
| 18 |
+
"wp1_euc_mae": 0.07507887870117307,
|
| 19 |
+
"wp1_euc_median": 0.020000000000000018,
|
| 20 |
+
"wp2_euc_mae": 0.15300439055300805,
|
| 21 |
+
"wp2_euc_median": 0.04472135954999579,
|
| 22 |
+
"wp3_euc_mae": 0.24257533874781437,
|
| 23 |
+
"wp3_euc_median": 0.0806225774829854,
|
| 24 |
+
"wp4_euc_mae": 0.35212693283711727,
|
| 25 |
+
"wp4_euc_median": 0.12369316876852973,
|
| 26 |
+
"wp5_euc_mae": 0.4665492393220971,
|
| 27 |
+
"wp5_euc_median": 0.17131835484052965,
|
| 28 |
+
"euclidean_mae": 0.25786695603224197,
|
| 29 |
+
"ADE": 0.25786695603224197,
|
| 30 |
+
"FDE": 0.4665492393220971,
|
| 31 |
+
"ADE_median": 0.09423994273900672,
|
| 32 |
+
"FDE_median": 0.17131835484052965,
|
| 33 |
+
"SR@0.5m": 0.8844827586206897,
|
| 34 |
+
"SR@1.0m": 0.9520689655172414,
|
| 35 |
+
"SR@2.0m": 0.9801724137931035,
|
| 36 |
+
"SR@5.0m": 0.9956896551724138,
|
| 37 |
+
"TrajSR@1.0m": 0.8931034482758621,
|
| 38 |
+
"TrajSR@2.0m": 0.9586206896551724,
|
| 39 |
+
"TrajSR@5.0m": 0.9887931034482759,
|
| 40 |
+
"RotAcc@1.0deg": 0.6555172413793103,
|
| 41 |
+
"RotAcc@5.0deg": 0.9496551724137932,
|
| 42 |
+
"RotAcc@10.0deg": 0.9872413793103448,
|
| 43 |
+
"wp1_rot_mae": 0.6533116266968418,
|
| 44 |
+
"wp2_rot_mae": 0.9349310214465391,
|
| 45 |
+
"wp3_rot_mae": 1.2746919556832232,
|
| 46 |
+
"wp4_rot_mae": 1.6432791843561125,
|
| 47 |
+
"wp5_rot_mae": 2.0312724349773714,
|
| 48 |
+
"rotation_euc_mae": 1.3074972446320177,
|
| 49 |
+
"parse_failure_rate": 0.0,
|
| 50 |
+
"parse_success_rate": 1.0,
|
| 51 |
+
"valid_samples": 1160,
|
| 52 |
+
"total_samples": 1160,
|
| 53 |
+
"parse_failures": 0,
|
| 54 |
+
"inference_engine": "transformers"
|
| 55 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/generation_config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645
|
| 5 |
+
],
|
| 6 |
+
"pad_token_id": 151643,
|
| 7 |
+
"transformers_version": "5.5.3"
|
| 8 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ca84729e7bbaee8063f7e92a2435d5d69d0d38983a451a5085d11c886ab5e34
|
| 3 |
+
size 17056747968
|
checkpoints/InternVL3.5-8B-SFT/processor_config.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"crop_to_patches": false,
|
| 4 |
+
"data_format": "channels_first",
|
| 5 |
+
"default_to_square": true,
|
| 6 |
+
"do_convert_rgb": true,
|
| 7 |
+
"do_normalize": true,
|
| 8 |
+
"do_rescale": true,
|
| 9 |
+
"do_resize": true,
|
| 10 |
+
"image_mean": [
|
| 11 |
+
0.485,
|
| 12 |
+
0.456,
|
| 13 |
+
0.406
|
| 14 |
+
],
|
| 15 |
+
"image_processor_type": "GotOcr2ImageProcessor",
|
| 16 |
+
"image_std": [
|
| 17 |
+
0.229,
|
| 18 |
+
0.224,
|
| 19 |
+
0.225
|
| 20 |
+
],
|
| 21 |
+
"max_patches": 12,
|
| 22 |
+
"min_patches": 1,
|
| 23 |
+
"resample": 3,
|
| 24 |
+
"rescale_factor": 0.00392156862745098,
|
| 25 |
+
"size": {
|
| 26 |
+
"height": 448,
|
| 27 |
+
"width": 448
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"image_seq_length": 256,
|
| 31 |
+
"processor_class": "InternVLProcessor",
|
| 32 |
+
"video_processor": {
|
| 33 |
+
"data_format": "channels_first",
|
| 34 |
+
"default_to_square": true,
|
| 35 |
+
"do_convert_rgb": true,
|
| 36 |
+
"do_normalize": true,
|
| 37 |
+
"do_rescale": true,
|
| 38 |
+
"do_resize": true,
|
| 39 |
+
"do_sample_frames": false,
|
| 40 |
+
"image_mean": [
|
| 41 |
+
0.48145466,
|
| 42 |
+
0.4578275,
|
| 43 |
+
0.40821073
|
| 44 |
+
],
|
| 45 |
+
"image_std": [
|
| 46 |
+
0.26862954,
|
| 47 |
+
0.26130258,
|
| 48 |
+
0.27577711
|
| 49 |
+
],
|
| 50 |
+
"initial_shift": true,
|
| 51 |
+
"model_valid_processing_keys": [
|
| 52 |
+
"do_convert_rgb",
|
| 53 |
+
"do_resize",
|
| 54 |
+
"size",
|
| 55 |
+
"size_divisor",
|
| 56 |
+
"default_to_square",
|
| 57 |
+
"resample",
|
| 58 |
+
"do_rescale",
|
| 59 |
+
"rescale_factor",
|
| 60 |
+
"do_normalize",
|
| 61 |
+
"image_mean",
|
| 62 |
+
"image_std",
|
| 63 |
+
"do_pad",
|
| 64 |
+
"do_center_crop",
|
| 65 |
+
"crop_size",
|
| 66 |
+
"data_format",
|
| 67 |
+
"input_data_format",
|
| 68 |
+
"device"
|
| 69 |
+
],
|
| 70 |
+
"resample": 3,
|
| 71 |
+
"rescale_factor": 0.00392156862745098,
|
| 72 |
+
"return_metadata": false,
|
| 73 |
+
"size": {
|
| 74 |
+
"height": 384,
|
| 75 |
+
"width": 384
|
| 76 |
+
},
|
| 77 |
+
"video_processor_type": "InternVLVideoProcessor"
|
| 78 |
+
}
|
| 79 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6581c44164d273d4222df982905a7e0450dcf3a4a7ebe98f9ec53e4de05beffe
|
| 3 |
+
size 11424300
|
checkpoints/InternVL3.5-8B-SFT/tokenizer_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": null,
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"context_image_token": "<IMG_CONTEXT>",
|
| 7 |
+
"end_image_token": "</img>",
|
| 8 |
+
"eos_token": "<|im_end|>",
|
| 9 |
+
"errors": "replace",
|
| 10 |
+
"extra_special_tokens": [
|
| 11 |
+
"<|im_end|>"
|
| 12 |
+
],
|
| 13 |
+
"is_local": true,
|
| 14 |
+
"model_max_length": 14588,
|
| 15 |
+
"model_specific_special_tokens": {
|
| 16 |
+
"context_image_token": "<IMG_CONTEXT>",
|
| 17 |
+
"end_image_token": "</img>",
|
| 18 |
+
"start_image_token": "<img>",
|
| 19 |
+
"video_token": "<|video_pad|>"
|
| 20 |
+
},
|
| 21 |
+
"pad_token": "<|endoftext|>",
|
| 22 |
+
"padding_side": "right",
|
| 23 |
+
"processor_class": "InternVLProcessor",
|
| 24 |
+
"split_special_tokens": false,
|
| 25 |
+
"start_image_token": "<img>",
|
| 26 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 27 |
+
"unk_token": null,
|
| 28 |
+
"video_token": "<|video_pad|>"
|
| 29 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1955525886476288.0,
|
| 4 |
+
"train_loss": 0.1948647116279602,
|
| 5 |
+
"train_runtime": 28413.61,
|
| 6 |
+
"train_samples_per_second": 7.039,
|
| 7 |
+
"train_steps_per_second": 0.11
|
| 8 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/trainer_state.json
ADDED
|
@@ -0,0 +1,2227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3125,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0032,
|
| 14 |
+
"grad_norm": 7.051180674009678,
|
| 15 |
+
"learning_rate": 1.437699680511182e-07,
|
| 16 |
+
"loss": 0.45998425483703614,
|
| 17 |
+
"step": 10
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.0064,
|
| 21 |
+
"grad_norm": 6.386709802443142,
|
| 22 |
+
"learning_rate": 3.0351437699680514e-07,
|
| 23 |
+
"loss": 0.44952831268310545,
|
| 24 |
+
"step": 20
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.0096,
|
| 28 |
+
"grad_norm": 5.3631010908380015,
|
| 29 |
+
"learning_rate": 4.6325878594249205e-07,
|
| 30 |
+
"loss": 0.3993690013885498,
|
| 31 |
+
"step": 30
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.0128,
|
| 35 |
+
"grad_norm": 0.852155255839625,
|
| 36 |
+
"learning_rate": 6.230031948881789e-07,
|
| 37 |
+
"loss": 0.3118258237838745,
|
| 38 |
+
"step": 40
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.016,
|
| 42 |
+
"grad_norm": 0.4705571475990448,
|
| 43 |
+
"learning_rate": 7.82747603833866e-07,
|
| 44 |
+
"loss": 0.2786674976348877,
|
| 45 |
+
"step": 50
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0192,
|
| 49 |
+
"grad_norm": 0.39720799855122535,
|
| 50 |
+
"learning_rate": 9.424920127795528e-07,
|
| 51 |
+
"loss": 0.2685645580291748,
|
| 52 |
+
"step": 60
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.0224,
|
| 56 |
+
"grad_norm": 0.34144681090493506,
|
| 57 |
+
"learning_rate": 1.1022364217252397e-06,
|
| 58 |
+
"loss": 0.27388153076171873,
|
| 59 |
+
"step": 70
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.0256,
|
| 63 |
+
"grad_norm": 0.29670665469044527,
|
| 64 |
+
"learning_rate": 1.2619808306709266e-06,
|
| 65 |
+
"loss": 0.2562382221221924,
|
| 66 |
+
"step": 80
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.0288,
|
| 70 |
+
"grad_norm": 0.2721949763566226,
|
| 71 |
+
"learning_rate": 1.4217252396166134e-06,
|
| 72 |
+
"loss": 0.2521932125091553,
|
| 73 |
+
"step": 90
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.032,
|
| 77 |
+
"grad_norm": 0.30509418891876505,
|
| 78 |
+
"learning_rate": 1.5814696485623005e-06,
|
| 79 |
+
"loss": 0.2553669214248657,
|
| 80 |
+
"step": 100
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.0352,
|
| 84 |
+
"grad_norm": 0.2710599378904947,
|
| 85 |
+
"learning_rate": 1.7412140575079875e-06,
|
| 86 |
+
"loss": 0.2548961162567139,
|
| 87 |
+
"step": 110
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.0384,
|
| 91 |
+
"grad_norm": 0.3180117403374185,
|
| 92 |
+
"learning_rate": 1.9009584664536742e-06,
|
| 93 |
+
"loss": 0.2442842960357666,
|
| 94 |
+
"step": 120
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.0416,
|
| 98 |
+
"grad_norm": 0.2695352733592907,
|
| 99 |
+
"learning_rate": 2.060702875399361e-06,
|
| 100 |
+
"loss": 0.24766459465026855,
|
| 101 |
+
"step": 130
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.0448,
|
| 105 |
+
"grad_norm": 0.3064535363854503,
|
| 106 |
+
"learning_rate": 2.220447284345048e-06,
|
| 107 |
+
"loss": 0.23845260143280028,
|
| 108 |
+
"step": 140
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.048,
|
| 112 |
+
"grad_norm": 0.29068646435586043,
|
| 113 |
+
"learning_rate": 2.380191693290735e-06,
|
| 114 |
+
"loss": 0.23559024333953857,
|
| 115 |
+
"step": 150
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.0512,
|
| 119 |
+
"grad_norm": 0.3186203915842237,
|
| 120 |
+
"learning_rate": 2.539936102236422e-06,
|
| 121 |
+
"loss": 0.23029537200927735,
|
| 122 |
+
"step": 160
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.0544,
|
| 126 |
+
"grad_norm": 0.32754075011707046,
|
| 127 |
+
"learning_rate": 2.699680511182109e-06,
|
| 128 |
+
"loss": 0.2385089635848999,
|
| 129 |
+
"step": 170
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.0576,
|
| 133 |
+
"grad_norm": 0.38484368515577855,
|
| 134 |
+
"learning_rate": 2.8594249201277955e-06,
|
| 135 |
+
"loss": 0.23111426830291748,
|
| 136 |
+
"step": 180
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0608,
|
| 140 |
+
"grad_norm": 0.299502856060473,
|
| 141 |
+
"learning_rate": 3.0191693290734825e-06,
|
| 142 |
+
"loss": 0.23530282974243164,
|
| 143 |
+
"step": 190
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.064,
|
| 147 |
+
"grad_norm": 0.3078123289936782,
|
| 148 |
+
"learning_rate": 3.17891373801917e-06,
|
| 149 |
+
"loss": 0.23611860275268554,
|
| 150 |
+
"step": 200
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.0672,
|
| 154 |
+
"grad_norm": 0.30717572422426626,
|
| 155 |
+
"learning_rate": 3.3386581469648564e-06,
|
| 156 |
+
"loss": 0.23241891860961914,
|
| 157 |
+
"step": 210
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.0704,
|
| 161 |
+
"grad_norm": 0.30949630760689323,
|
| 162 |
+
"learning_rate": 3.4984025559105434e-06,
|
| 163 |
+
"loss": 0.2257370948791504,
|
| 164 |
+
"step": 220
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.0736,
|
| 168 |
+
"grad_norm": 0.2734080768093611,
|
| 169 |
+
"learning_rate": 3.6581469648562303e-06,
|
| 170 |
+
"loss": 0.22820501327514647,
|
| 171 |
+
"step": 230
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.0768,
|
| 175 |
+
"grad_norm": 0.31986420438753294,
|
| 176 |
+
"learning_rate": 3.817891373801918e-06,
|
| 177 |
+
"loss": 0.22324295043945314,
|
| 178 |
+
"step": 240
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.08,
|
| 182 |
+
"grad_norm": 0.3271935835910018,
|
| 183 |
+
"learning_rate": 3.977635782747604e-06,
|
| 184 |
+
"loss": 0.22092509269714355,
|
| 185 |
+
"step": 250
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.0832,
|
| 189 |
+
"grad_norm": 0.28164810489138675,
|
| 190 |
+
"learning_rate": 4.137380191693291e-06,
|
| 191 |
+
"loss": 0.22088565826416015,
|
| 192 |
+
"step": 260
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.0864,
|
| 196 |
+
"grad_norm": 0.2806581165168549,
|
| 197 |
+
"learning_rate": 4.297124600638978e-06,
|
| 198 |
+
"loss": 0.2235860824584961,
|
| 199 |
+
"step": 270
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.0896,
|
| 203 |
+
"grad_norm": 0.2818314341404028,
|
| 204 |
+
"learning_rate": 4.456869009584665e-06,
|
| 205 |
+
"loss": 0.21951718330383302,
|
| 206 |
+
"step": 280
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.0928,
|
| 210 |
+
"grad_norm": 0.2755068214230404,
|
| 211 |
+
"learning_rate": 4.616613418530352e-06,
|
| 212 |
+
"loss": 0.22480430603027343,
|
| 213 |
+
"step": 290
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.096,
|
| 217 |
+
"grad_norm": 0.2991295097090295,
|
| 218 |
+
"learning_rate": 4.776357827476039e-06,
|
| 219 |
+
"loss": 0.22600164413452148,
|
| 220 |
+
"step": 300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.0992,
|
| 224 |
+
"grad_norm": 0.3239664056294863,
|
| 225 |
+
"learning_rate": 4.936102236421725e-06,
|
| 226 |
+
"loss": 0.21372499465942382,
|
| 227 |
+
"step": 310
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.1024,
|
| 231 |
+
"grad_norm": 0.2881723034602484,
|
| 232 |
+
"learning_rate": 4.999943833158769e-06,
|
| 233 |
+
"loss": 0.21513206958770753,
|
| 234 |
+
"step": 320
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.1056,
|
| 238 |
+
"grad_norm": 0.27877645475403023,
|
| 239 |
+
"learning_rate": 4.999600600490783e-06,
|
| 240 |
+
"loss": 0.22072982788085938,
|
| 241 |
+
"step": 330
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.1088,
|
| 245 |
+
"grad_norm": 0.28224550070191395,
|
| 246 |
+
"learning_rate": 4.9989453817439345e-06,
|
| 247 |
+
"loss": 0.2146312713623047,
|
| 248 |
+
"step": 340
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.112,
|
| 252 |
+
"grad_norm": 0.26853026150431764,
|
| 253 |
+
"learning_rate": 4.997978258698942e-06,
|
| 254 |
+
"loss": 0.21449072360992433,
|
| 255 |
+
"step": 350
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.1152,
|
| 259 |
+
"grad_norm": 0.2706003676564934,
|
| 260 |
+
"learning_rate": 4.996699352066659e-06,
|
| 261 |
+
"loss": 0.2151791572570801,
|
| 262 |
+
"step": 360
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.1184,
|
| 266 |
+
"grad_norm": 0.28539700359373177,
|
| 267 |
+
"learning_rate": 4.995108821473014e-06,
|
| 268 |
+
"loss": 0.21470160484313966,
|
| 269 |
+
"step": 370
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.1216,
|
| 273 |
+
"grad_norm": 0.29207494833659137,
|
| 274 |
+
"learning_rate": 4.993206865439084e-06,
|
| 275 |
+
"loss": 0.21086468696594238,
|
| 276 |
+
"step": 380
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.1248,
|
| 280 |
+
"grad_norm": 0.263064572322246,
|
| 281 |
+
"learning_rate": 4.990993721356317e-06,
|
| 282 |
+
"loss": 0.20984139442443847,
|
| 283 |
+
"step": 390
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.128,
|
| 287 |
+
"grad_norm": 0.2865097413347111,
|
| 288 |
+
"learning_rate": 4.988469665456901e-06,
|
| 289 |
+
"loss": 0.21040558815002441,
|
| 290 |
+
"step": 400
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.1312,
|
| 294 |
+
"grad_norm": 0.2738592744136949,
|
| 295 |
+
"learning_rate": 4.985635012779288e-06,
|
| 296 |
+
"loss": 0.21828360557556153,
|
| 297 |
+
"step": 410
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.1344,
|
| 301 |
+
"grad_norm": 0.24970618963972283,
|
| 302 |
+
"learning_rate": 4.98249011712887e-06,
|
| 303 |
+
"loss": 0.2106489658355713,
|
| 304 |
+
"step": 420
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.1376,
|
| 308 |
+
"grad_norm": 0.2629431248486553,
|
| 309 |
+
"learning_rate": 4.979035371033824e-06,
|
| 310 |
+
"loss": 0.20979018211364747,
|
| 311 |
+
"step": 430
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.1408,
|
| 315 |
+
"grad_norm": 0.2725505982701801,
|
| 316 |
+
"learning_rate": 4.975271205696115e-06,
|
| 317 |
+
"loss": 0.20948367118835448,
|
| 318 |
+
"step": 440
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.144,
|
| 322 |
+
"grad_norm": 0.2704053444924022,
|
| 323 |
+
"learning_rate": 4.971198090937671e-06,
|
| 324 |
+
"loss": 0.2033768653869629,
|
| 325 |
+
"step": 450
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.1472,
|
| 329 |
+
"grad_norm": 0.31765035973786815,
|
| 330 |
+
"learning_rate": 4.966816535141756e-06,
|
| 331 |
+
"loss": 0.20044360160827637,
|
| 332 |
+
"step": 460
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.1504,
|
| 336 |
+
"grad_norm": 0.2502347867419884,
|
| 337 |
+
"learning_rate": 4.9621270851895035e-06,
|
| 338 |
+
"loss": 0.2100567102432251,
|
| 339 |
+
"step": 470
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.1536,
|
| 343 |
+
"grad_norm": 0.2934932151321077,
|
| 344 |
+
"learning_rate": 4.957130326391662e-06,
|
| 345 |
+
"loss": 0.21090621948242189,
|
| 346 |
+
"step": 480
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.1568,
|
| 350 |
+
"grad_norm": 0.26660410583968774,
|
| 351 |
+
"learning_rate": 4.951826882415544e-06,
|
| 352 |
+
"loss": 0.20775444507598878,
|
| 353 |
+
"step": 490
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.16,
|
| 357 |
+
"grad_norm": 0.28519626596006936,
|
| 358 |
+
"learning_rate": 4.946217415207177e-06,
|
| 359 |
+
"loss": 0.20256528854370118,
|
| 360 |
+
"step": 500
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.1632,
|
| 364 |
+
"grad_norm": 0.2798675045050625,
|
| 365 |
+
"learning_rate": 4.940302624908689e-06,
|
| 366 |
+
"loss": 0.20623595714569093,
|
| 367 |
+
"step": 510
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.1664,
|
| 371 |
+
"grad_norm": 0.28222884808809434,
|
| 372 |
+
"learning_rate": 4.934083249770912e-06,
|
| 373 |
+
"loss": 0.20097856521606444,
|
| 374 |
+
"step": 520
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.1696,
|
| 378 |
+
"grad_norm": 0.2788085638053828,
|
| 379 |
+
"learning_rate": 4.927560066061251e-06,
|
| 380 |
+
"loss": 0.20387496948242187,
|
| 381 |
+
"step": 530
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.1728,
|
| 385 |
+
"grad_norm": 0.27554368272722524,
|
| 386 |
+
"learning_rate": 4.920733887966783e-06,
|
| 387 |
+
"loss": 0.21524934768676757,
|
| 388 |
+
"step": 540
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.176,
|
| 392 |
+
"grad_norm": 0.26559833530971816,
|
| 393 |
+
"learning_rate": 4.913605567492636e-06,
|
| 394 |
+
"loss": 0.20402135848999023,
|
| 395 |
+
"step": 550
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.1792,
|
| 399 |
+
"grad_norm": 0.26554772115650926,
|
| 400 |
+
"learning_rate": 4.906175994355656e-06,
|
| 401 |
+
"loss": 0.20598478317260743,
|
| 402 |
+
"step": 560
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.1824,
|
| 406 |
+
"grad_norm": 0.3223429392292309,
|
| 407 |
+
"learning_rate": 4.898446095873345e-06,
|
| 408 |
+
"loss": 0.20747475624084472,
|
| 409 |
+
"step": 570
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.1856,
|
| 413 |
+
"grad_norm": 0.24355730693567182,
|
| 414 |
+
"learning_rate": 4.890416836848128e-06,
|
| 415 |
+
"loss": 0.20512137413024903,
|
| 416 |
+
"step": 580
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.1888,
|
| 420 |
+
"grad_norm": 0.3001767473938059,
|
| 421 |
+
"learning_rate": 4.882089219446925e-06,
|
| 422 |
+
"loss": 0.19992779493331908,
|
| 423 |
+
"step": 590
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.192,
|
| 427 |
+
"grad_norm": 0.2835389086432711,
|
| 428 |
+
"learning_rate": 4.873464283076074e-06,
|
| 429 |
+
"loss": 0.20495295524597168,
|
| 430 |
+
"step": 600
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.1952,
|
| 434 |
+
"grad_norm": 0.26019712508927473,
|
| 435 |
+
"learning_rate": 4.864543104251587e-06,
|
| 436 |
+
"loss": 0.2035728931427002,
|
| 437 |
+
"step": 610
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.1984,
|
| 441 |
+
"grad_norm": 0.2657949563176517,
|
| 442 |
+
"learning_rate": 4.855326796464798e-06,
|
| 443 |
+
"loss": 0.20619282722473145,
|
| 444 |
+
"step": 620
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.2016,
|
| 448 |
+
"grad_norm": 0.28295912792439204,
|
| 449 |
+
"learning_rate": 4.8458165100433725e-06,
|
| 450 |
+
"loss": 0.2016925811767578,
|
| 451 |
+
"step": 630
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.2048,
|
| 455 |
+
"grad_norm": 0.2902924299127114,
|
| 456 |
+
"learning_rate": 4.836013432007738e-06,
|
| 457 |
+
"loss": 0.20164456367492675,
|
| 458 |
+
"step": 640
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.208,
|
| 462 |
+
"grad_norm": 0.24256417788990398,
|
| 463 |
+
"learning_rate": 4.825918785922921e-06,
|
| 464 |
+
"loss": 0.20648303031921386,
|
| 465 |
+
"step": 650
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.2112,
|
| 469 |
+
"grad_norm": 0.27122351891055063,
|
| 470 |
+
"learning_rate": 4.8155338317458315e-06,
|
| 471 |
+
"loss": 0.20356349945068358,
|
| 472 |
+
"step": 660
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.2144,
|
| 476 |
+
"grad_norm": 0.2600569122055766,
|
| 477 |
+
"learning_rate": 4.804859865668002e-06,
|
| 478 |
+
"loss": 0.19959055185317992,
|
| 479 |
+
"step": 670
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.2176,
|
| 483 |
+
"grad_norm": 0.25345624369635567,
|
| 484 |
+
"learning_rate": 4.793898219953804e-06,
|
| 485 |
+
"loss": 0.2007960557937622,
|
| 486 |
+
"step": 680
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.2208,
|
| 490 |
+
"grad_norm": 0.2544929334444299,
|
| 491 |
+
"learning_rate": 4.782650262774164e-06,
|
| 492 |
+
"loss": 0.20300769805908203,
|
| 493 |
+
"step": 690
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.224,
|
| 497 |
+
"grad_norm": 0.2897145189307127,
|
| 498 |
+
"learning_rate": 4.7711173980357886e-06,
|
| 499 |
+
"loss": 0.19880002737045288,
|
| 500 |
+
"step": 700
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.2272,
|
| 504 |
+
"grad_norm": 0.2560542526589546,
|
| 505 |
+
"learning_rate": 4.759301065205947e-06,
|
| 506 |
+
"loss": 0.19960763454437255,
|
| 507 |
+
"step": 710
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.2304,
|
| 511 |
+
"grad_norm": 0.3097914904022575,
|
| 512 |
+
"learning_rate": 4.7472027391328e-06,
|
| 513 |
+
"loss": 0.2003918170928955,
|
| 514 |
+
"step": 720
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.2336,
|
| 518 |
+
"grad_norm": 0.2887809607696432,
|
| 519 |
+
"learning_rate": 4.734823929861317e-06,
|
| 520 |
+
"loss": 0.20292911529541016,
|
| 521 |
+
"step": 730
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.2368,
|
| 525 |
+
"grad_norm": 0.28917619670340877,
|
| 526 |
+
"learning_rate": 4.722166182444801e-06,
|
| 527 |
+
"loss": 0.20004558563232422,
|
| 528 |
+
"step": 740
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.24,
|
| 532 |
+
"grad_norm": 0.27043264841658887,
|
| 533 |
+
"learning_rate": 4.709231076752045e-06,
|
| 534 |
+
"loss": 0.19843683242797852,
|
| 535 |
+
"step": 750
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.2432,
|
| 539 |
+
"grad_norm": 0.2645651727770741,
|
| 540 |
+
"learning_rate": 4.696020227270142e-06,
|
| 541 |
+
"loss": 0.20258240699768065,
|
| 542 |
+
"step": 760
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.2464,
|
| 546 |
+
"grad_norm": 0.2777282429222742,
|
| 547 |
+
"learning_rate": 4.6825352829029705e-06,
|
| 548 |
+
"loss": 0.1994302749633789,
|
| 549 |
+
"step": 770
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.2496,
|
| 553 |
+
"grad_norm": 0.28182340391383837,
|
| 554 |
+
"learning_rate": 4.668777926765392e-06,
|
| 555 |
+
"loss": 0.197939932346344,
|
| 556 |
+
"step": 780
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.2528,
|
| 560 |
+
"grad_norm": 0.2390403179508666,
|
| 561 |
+
"learning_rate": 4.6547498759731725e-06,
|
| 562 |
+
"loss": 0.19328031539916993,
|
| 563 |
+
"step": 790
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.256,
|
| 567 |
+
"grad_norm": 0.30761446053746666,
|
| 568 |
+
"learning_rate": 4.6404528814286575e-06,
|
| 569 |
+
"loss": 0.1962287425994873,
|
| 570 |
+
"step": 800
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.2592,
|
| 574 |
+
"grad_norm": 0.26058296777263723,
|
| 575 |
+
"learning_rate": 4.6258887276022425e-06,
|
| 576 |
+
"loss": 0.20304152965545655,
|
| 577 |
+
"step": 810
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.2624,
|
| 581 |
+
"grad_norm": 0.3023946784650888,
|
| 582 |
+
"learning_rate": 4.611059232309639e-06,
|
| 583 |
+
"loss": 0.19789116382598876,
|
| 584 |
+
"step": 820
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.2656,
|
| 588 |
+
"grad_norm": 0.28736962727648746,
|
| 589 |
+
"learning_rate": 4.595966246484986e-06,
|
| 590 |
+
"loss": 0.19968997240066527,
|
| 591 |
+
"step": 830
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.2688,
|
| 595 |
+
"grad_norm": 0.28571881200537336,
|
| 596 |
+
"learning_rate": 4.580611653949829e-06,
|
| 597 |
+
"loss": 0.20007586479187012,
|
| 598 |
+
"step": 840
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.272,
|
| 602 |
+
"grad_norm": 0.295019179491335,
|
| 603 |
+
"learning_rate": 4.564997371177992e-06,
|
| 604 |
+
"loss": 0.19763822555541993,
|
| 605 |
+
"step": 850
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.2752,
|
| 609 |
+
"grad_norm": 0.29653404936460237,
|
| 610 |
+
"learning_rate": 4.54912534705637e-06,
|
| 611 |
+
"loss": 0.19755616188049316,
|
| 612 |
+
"step": 860
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.2784,
|
| 616 |
+
"grad_norm": 0.2642449071502374,
|
| 617 |
+
"learning_rate": 4.532997562641683e-06,
|
| 618 |
+
"loss": 0.19439829587936402,
|
| 619 |
+
"step": 870
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.2816,
|
| 623 |
+
"grad_norm": 0.25657475126133233,
|
| 624 |
+
"learning_rate": 4.516616030913214e-06,
|
| 625 |
+
"loss": 0.1987127423286438,
|
| 626 |
+
"step": 880
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.2848,
|
| 630 |
+
"grad_norm": 0.28458590654874555,
|
| 631 |
+
"learning_rate": 4.499982796521556e-06,
|
| 632 |
+
"loss": 0.19352295398712158,
|
| 633 |
+
"step": 890
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.288,
|
| 637 |
+
"grad_norm": 0.2793448530701338,
|
| 638 |
+
"learning_rate": 4.48309993553341e-06,
|
| 639 |
+
"loss": 0.1959349274635315,
|
| 640 |
+
"step": 900
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.2912,
|
| 644 |
+
"grad_norm": 0.3163250873932861,
|
| 645 |
+
"learning_rate": 4.465969555172468e-06,
|
| 646 |
+
"loss": 0.1957021713256836,
|
| 647 |
+
"step": 910
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.2944,
|
| 651 |
+
"grad_norm": 0.2933329400631374,
|
| 652 |
+
"learning_rate": 4.448593793556391e-06,
|
| 653 |
+
"loss": 0.20156097412109375,
|
| 654 |
+
"step": 920
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.2976,
|
| 658 |
+
"grad_norm": 0.2688085579058971,
|
| 659 |
+
"learning_rate": 4.430974819429954e-06,
|
| 660 |
+
"loss": 0.1948945164680481,
|
| 661 |
+
"step": 930
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.3008,
|
| 665 |
+
"grad_norm": 0.28553708068341715,
|
| 666 |
+
"learning_rate": 4.413114831894344e-06,
|
| 667 |
+
"loss": 0.18995710611343383,
|
| 668 |
+
"step": 940
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.304,
|
| 672 |
+
"grad_norm": 0.26518275753825254,
|
| 673 |
+
"learning_rate": 4.3950160601326865e-06,
|
| 674 |
+
"loss": 0.18871839046478273,
|
| 675 |
+
"step": 950
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.3072,
|
| 679 |
+
"grad_norm": 0.28692003913342795,
|
| 680 |
+
"learning_rate": 4.376680763131811e-06,
|
| 681 |
+
"loss": 0.19533849954605104,
|
| 682 |
+
"step": 960
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.3104,
|
| 686 |
+
"grad_norm": 0.27227233815166896,
|
| 687 |
+
"learning_rate": 4.358111229400296e-06,
|
| 688 |
+
"loss": 0.19751427173614503,
|
| 689 |
+
"step": 970
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 0.3136,
|
| 693 |
+
"grad_norm": 0.27245831220598377,
|
| 694 |
+
"learning_rate": 4.33930977668283e-06,
|
| 695 |
+
"loss": 0.20111453533172607,
|
| 696 |
+
"step": 980
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 0.3168,
|
| 700 |
+
"grad_norm": 0.2482632152661181,
|
| 701 |
+
"learning_rate": 4.320278751670922e-06,
|
| 702 |
+
"loss": 0.19406617879867555,
|
| 703 |
+
"step": 990
|
| 704 |
+
},
|
| 705 |
+
{
|
| 706 |
+
"epoch": 0.32,
|
| 707 |
+
"grad_norm": 0.2892442073812178,
|
| 708 |
+
"learning_rate": 4.301020529710009e-06,
|
| 709 |
+
"loss": 0.19525597095489503,
|
| 710 |
+
"step": 1000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.3232,
|
| 714 |
+
"grad_norm": 0.26392559431034407,
|
| 715 |
+
"learning_rate": 4.281537514502962e-06,
|
| 716 |
+
"loss": 0.19918107986450195,
|
| 717 |
+
"step": 1010
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.3264,
|
| 721 |
+
"grad_norm": 0.27003912401002855,
|
| 722 |
+
"learning_rate": 4.261832137810093e-06,
|
| 723 |
+
"loss": 0.1964997172355652,
|
| 724 |
+
"step": 1020
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.3296,
|
| 728 |
+
"grad_norm": 0.2664017566726753,
|
| 729 |
+
"learning_rate": 4.241906859145611e-06,
|
| 730 |
+
"loss": 0.19660145044326782,
|
| 731 |
+
"step": 1030
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.3328,
|
| 735 |
+
"grad_norm": 0.2744161118643581,
|
| 736 |
+
"learning_rate": 4.221764165470661e-06,
|
| 737 |
+
"loss": 0.1935626745223999,
|
| 738 |
+
"step": 1040
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.336,
|
| 742 |
+
"grad_norm": 0.2717693030089869,
|
| 743 |
+
"learning_rate": 4.201406570882898e-06,
|
| 744 |
+
"loss": 0.19286205768585205,
|
| 745 |
+
"step": 1050
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.3392,
|
| 749 |
+
"grad_norm": 0.259292524653773,
|
| 750 |
+
"learning_rate": 4.180836616302704e-06,
|
| 751 |
+
"loss": 0.1922353148460388,
|
| 752 |
+
"step": 1060
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.3424,
|
| 756 |
+
"grad_norm": 0.2739674960468982,
|
| 757 |
+
"learning_rate": 4.160056869156041e-06,
|
| 758 |
+
"loss": 0.19553282260894775,
|
| 759 |
+
"step": 1070
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.3456,
|
| 763 |
+
"grad_norm": 0.272965837223612,
|
| 764 |
+
"learning_rate": 4.139069923053995e-06,
|
| 765 |
+
"loss": 0.19367674589157105,
|
| 766 |
+
"step": 1080
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.3488,
|
| 770 |
+
"grad_norm": 0.2463436566122966,
|
| 771 |
+
"learning_rate": 4.117878397469062e-06,
|
| 772 |
+
"loss": 0.19772920608520508,
|
| 773 |
+
"step": 1090
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.352,
|
| 777 |
+
"grad_norm": 0.24672019869428047,
|
| 778 |
+
"learning_rate": 4.096484937408195e-06,
|
| 779 |
+
"loss": 0.1892393112182617,
|
| 780 |
+
"step": 1100
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.3552,
|
| 784 |
+
"grad_norm": 0.2673060417093708,
|
| 785 |
+
"learning_rate": 4.074892213082676e-06,
|
| 786 |
+
"loss": 0.1892371416091919,
|
| 787 |
+
"step": 1110
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.3584,
|
| 791 |
+
"grad_norm": 0.26767314750680543,
|
| 792 |
+
"learning_rate": 4.0531029195748265e-06,
|
| 793 |
+
"loss": 0.19717614650726317,
|
| 794 |
+
"step": 1120
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.3616,
|
| 798 |
+
"grad_norm": 0.2796524343786416,
|
| 799 |
+
"learning_rate": 4.03111977650163e-06,
|
| 800 |
+
"loss": 0.19503848552703856,
|
| 801 |
+
"step": 1130
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.3648,
|
| 805 |
+
"grad_norm": 0.2816284710404393,
|
| 806 |
+
"learning_rate": 4.008945527675281e-06,
|
| 807 |
+
"loss": 0.19529366493225098,
|
| 808 |
+
"step": 1140
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.368,
|
| 812 |
+
"grad_norm": 0.31949481569871324,
|
| 813 |
+
"learning_rate": 3.986582940760717e-06,
|
| 814 |
+
"loss": 0.18451136350631714,
|
| 815 |
+
"step": 1150
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.3712,
|
| 819 |
+
"grad_norm": 0.2723449306170863,
|
| 820 |
+
"learning_rate": 3.9640348069301785e-06,
|
| 821 |
+
"loss": 0.191510009765625,
|
| 822 |
+
"step": 1160
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.3744,
|
| 826 |
+
"grad_norm": 0.27747112521567696,
|
| 827 |
+
"learning_rate": 3.941303940514826e-06,
|
| 828 |
+
"loss": 0.19263410568237305,
|
| 829 |
+
"step": 1170
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.3776,
|
| 833 |
+
"grad_norm": 0.2719099807762723,
|
| 834 |
+
"learning_rate": 3.918393178653472e-06,
|
| 835 |
+
"loss": 0.19341590404510497,
|
| 836 |
+
"step": 1180
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.3808,
|
| 840 |
+
"grad_norm": 0.29074805846664115,
|
| 841 |
+
"learning_rate": 3.895305380938468e-06,
|
| 842 |
+
"loss": 0.19099385738372804,
|
| 843 |
+
"step": 1190
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 0.384,
|
| 847 |
+
"grad_norm": 0.2517462589595264,
|
| 848 |
+
"learning_rate": 3.872043429058783e-06,
|
| 849 |
+
"loss": 0.18874506950378417,
|
| 850 |
+
"step": 1200
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 0.3872,
|
| 854 |
+
"grad_norm": 0.2591827841853763,
|
| 855 |
+
"learning_rate": 3.84861022644033e-06,
|
| 856 |
+
"loss": 0.19069148302078248,
|
| 857 |
+
"step": 1210
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 0.3904,
|
| 861 |
+
"grad_norm": 0.2702770742629986,
|
| 862 |
+
"learning_rate": 3.825008697883574e-06,
|
| 863 |
+
"loss": 0.19928838014602662,
|
| 864 |
+
"step": 1220
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 0.3936,
|
| 868 |
+
"grad_norm": 0.27788866885326635,
|
| 869 |
+
"learning_rate": 3.8012417891984776e-06,
|
| 870 |
+
"loss": 0.19237933158874512,
|
| 871 |
+
"step": 1230
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 0.3968,
|
| 875 |
+
"grad_norm": 0.2656255469668472,
|
| 876 |
+
"learning_rate": 3.777312466836819e-06,
|
| 877 |
+
"loss": 0.19055767059326173,
|
| 878 |
+
"step": 1240
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.4,
|
| 882 |
+
"grad_norm": 0.28446496354107703,
|
| 883 |
+
"learning_rate": 3.7532237175219378e-06,
|
| 884 |
+
"loss": 0.18940582275390624,
|
| 885 |
+
"step": 1250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.4032,
|
| 889 |
+
"grad_norm": 0.4152862546777316,
|
| 890 |
+
"learning_rate": 3.728978547875948e-06,
|
| 891 |
+
"loss": 0.19362914562225342,
|
| 892 |
+
"step": 1260
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.4064,
|
| 896 |
+
"grad_norm": 0.28537432061728957,
|
| 897 |
+
"learning_rate": 3.7045799840444712e-06,
|
| 898 |
+
"loss": 0.1886904716491699,
|
| 899 |
+
"step": 1270
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.4096,
|
| 903 |
+
"grad_norm": 0.29038310854731697,
|
| 904 |
+
"learning_rate": 3.6800310713189258e-06,
|
| 905 |
+
"loss": 0.18923617601394654,
|
| 906 |
+
"step": 1280
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.4128,
|
| 910 |
+
"grad_norm": 0.32132086585692904,
|
| 911 |
+
"learning_rate": 3.6553348737564328e-06,
|
| 912 |
+
"loss": 0.19005811214447021,
|
| 913 |
+
"step": 1290
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.416,
|
| 917 |
+
"grad_norm": 0.2669423345384319,
|
| 918 |
+
"learning_rate": 3.6304944737973794e-06,
|
| 919 |
+
"loss": 0.19575085639953613,
|
| 920 |
+
"step": 1300
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.4192,
|
| 924 |
+
"grad_norm": 0.28931030301965927,
|
| 925 |
+
"learning_rate": 3.6055129718806836e-06,
|
| 926 |
+
"loss": 0.18975239992141724,
|
| 927 |
+
"step": 1310
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.4224,
|
| 931 |
+
"grad_norm": 0.28948269391746034,
|
| 932 |
+
"learning_rate": 3.5803934860568134e-06,
|
| 933 |
+
"loss": 0.18510067462921143,
|
| 934 |
+
"step": 1320
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.4256,
|
| 938 |
+
"grad_norm": 0.2821484963772758,
|
| 939 |
+
"learning_rate": 3.5551391515986163e-06,
|
| 940 |
+
"loss": 0.1907583475112915,
|
| 941 |
+
"step": 1330
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.4288,
|
| 945 |
+
"grad_norm": 0.27423888046510925,
|
| 946 |
+
"learning_rate": 3.529753120609982e-06,
|
| 947 |
+
"loss": 0.18690071105957032,
|
| 948 |
+
"step": 1340
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.432,
|
| 952 |
+
"grad_norm": 0.30811658453814883,
|
| 953 |
+
"learning_rate": 3.5042385616324243e-06,
|
| 954 |
+
"loss": 0.19000139236450195,
|
| 955 |
+
"step": 1350
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.4352,
|
| 959 |
+
"grad_norm": 0.24402420223179272,
|
| 960 |
+
"learning_rate": 3.4785986592495934e-06,
|
| 961 |
+
"loss": 0.18803791999816893,
|
| 962 |
+
"step": 1360
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.4384,
|
| 966 |
+
"grad_norm": 0.24576039119812526,
|
| 967 |
+
"learning_rate": 3.452836613689803e-06,
|
| 968 |
+
"loss": 0.1866163969039917,
|
| 969 |
+
"step": 1370
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 0.4416,
|
| 973 |
+
"grad_norm": 0.2949022587874532,
|
| 974 |
+
"learning_rate": 3.426955640426584e-06,
|
| 975 |
+
"loss": 0.1890486001968384,
|
| 976 |
+
"step": 1380
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 0.4448,
|
| 980 |
+
"grad_norm": 0.2582182081996982,
|
| 981 |
+
"learning_rate": 3.4009589697773605e-06,
|
| 982 |
+
"loss": 0.18851635456085206,
|
| 983 |
+
"step": 1390
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 0.448,
|
| 987 |
+
"grad_norm": 0.2722482128131903,
|
| 988 |
+
"learning_rate": 3.3748498465002475e-06,
|
| 989 |
+
"loss": 0.18554195165634155,
|
| 990 |
+
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 0.4512,
|
| 994 |
+
"grad_norm": 0.27484686642107964,
|
| 995 |
+
"learning_rate": 3.3486315293890693e-06,
|
| 996 |
+
"loss": 0.19425587654113768,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 0.4544,
|
| 1001 |
+
"grad_norm": 0.28258316073925427,
|
| 1002 |
+
"learning_rate": 3.3223072908666053e-06,
|
| 1003 |
+
"loss": 0.1843653440475464,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 0.4576,
|
| 1008 |
+
"grad_norm": 0.28555979247115143,
|
| 1009 |
+
"learning_rate": 3.295880416576153e-06,
|
| 1010 |
+
"loss": 0.1941524863243103,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 0.4608,
|
| 1015 |
+
"grad_norm": 0.2969010932820601,
|
| 1016 |
+
"learning_rate": 3.269354204971427e-06,
|
| 1017 |
+
"loss": 0.18759560585021973,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 0.464,
|
| 1022 |
+
"grad_norm": 0.30795851200957197,
|
| 1023 |
+
"learning_rate": 3.242731966904865e-06,
|
| 1024 |
+
"loss": 0.18544803857803344,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 0.4672,
|
| 1029 |
+
"grad_norm": 0.28527072571260903,
|
| 1030 |
+
"learning_rate": 3.2160170252143913e-06,
|
| 1031 |
+
"loss": 0.18547136783599855,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 0.4704,
|
| 1036 |
+
"grad_norm": 0.2533866816866613,
|
| 1037 |
+
"learning_rate": 3.1892127143086716e-06,
|
| 1038 |
+
"loss": 0.19228132963180541,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 0.4736,
|
| 1043 |
+
"grad_norm": 0.2776942873045479,
|
| 1044 |
+
"learning_rate": 3.1623223797509347e-06,
|
| 1045 |
+
"loss": 0.1812342882156372,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 0.4768,
|
| 1050 |
+
"grad_norm": 0.2744584915099732,
|
| 1051 |
+
"learning_rate": 3.135349377841396e-06,
|
| 1052 |
+
"loss": 0.1853887915611267,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 0.48,
|
| 1057 |
+
"grad_norm": 0.2866639604297882,
|
| 1058 |
+
"learning_rate": 3.1082970751983497e-06,
|
| 1059 |
+
"loss": 0.1918737769126892,
|
| 1060 |
+
"step": 1500
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 0.4832,
|
| 1064 |
+
"grad_norm": 0.26310322890713356,
|
| 1065 |
+
"learning_rate": 3.0811688483379546e-06,
|
| 1066 |
+
"loss": 0.18995790481567382,
|
| 1067 |
+
"step": 1510
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 0.4864,
|
| 1071 |
+
"grad_norm": 0.28320054398109096,
|
| 1072 |
+
"learning_rate": 3.0539680832528074e-06,
|
| 1073 |
+
"loss": 0.18962399959564208,
|
| 1074 |
+
"step": 1520
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 0.4896,
|
| 1078 |
+
"grad_norm": 0.2654570443815982,
|
| 1079 |
+
"learning_rate": 3.026698174989316e-06,
|
| 1080 |
+
"loss": 0.18734774589538575,
|
| 1081 |
+
"step": 1530
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.4928,
|
| 1085 |
+
"grad_norm": 0.2658181920127404,
|
| 1086 |
+
"learning_rate": 2.999362527223952e-06,
|
| 1087 |
+
"loss": 0.1873406410217285,
|
| 1088 |
+
"step": 1540
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.496,
|
| 1092 |
+
"grad_norm": 0.29250213703445505,
|
| 1093 |
+
"learning_rate": 2.9719645518384194e-06,
|
| 1094 |
+
"loss": 0.1892526626586914,
|
| 1095 |
+
"step": 1550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.4992,
|
| 1099 |
+
"grad_norm": 0.3090995402302473,
|
| 1100 |
+
"learning_rate": 2.944507668493807e-06,
|
| 1101 |
+
"loss": 0.19257349967956544,
|
| 1102 |
+
"step": 1560
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.5024,
|
| 1106 |
+
"grad_norm": 0.28272052629438726,
|
| 1107 |
+
"learning_rate": 2.9169953042037623e-06,
|
| 1108 |
+
"loss": 0.18868753910064698,
|
| 1109 |
+
"step": 1570
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.5056,
|
| 1113 |
+
"grad_norm": 0.3954198531333443,
|
| 1114 |
+
"learning_rate": 2.889430892906754e-06,
|
| 1115 |
+
"loss": 0.18459179401397705,
|
| 1116 |
+
"step": 1580
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.5088,
|
| 1120 |
+
"grad_norm": 0.2563261821009193,
|
| 1121 |
+
"learning_rate": 2.861817875037462e-06,
|
| 1122 |
+
"loss": 0.18163517713546753,
|
| 1123 |
+
"step": 1590
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.512,
|
| 1127 |
+
"grad_norm": 0.28115388072993086,
|
| 1128 |
+
"learning_rate": 2.8341596970973683e-06,
|
| 1129 |
+
"loss": 0.19087796211242675,
|
| 1130 |
+
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.5152,
|
| 1134 |
+
"grad_norm": 0.27079102831839946,
|
| 1135 |
+
"learning_rate": 2.80645981122458e-06,
|
| 1136 |
+
"loss": 0.1863863706588745,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.5184,
|
| 1141 |
+
"grad_norm": 0.27596423249252744,
|
| 1142 |
+
"learning_rate": 2.7787216747629508e-06,
|
| 1143 |
+
"loss": 0.19303735494613647,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.5216,
|
| 1148 |
+
"grad_norm": 0.2682301223547138,
|
| 1149 |
+
"learning_rate": 2.7509487498305615e-06,
|
| 1150 |
+
"loss": 0.18045294284820557,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.5248,
|
| 1155 |
+
"grad_norm": 0.27817197846381203,
|
| 1156 |
+
"learning_rate": 2.7231445028875924e-06,
|
| 1157 |
+
"loss": 0.18653267621994019,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.528,
|
| 1162 |
+
"grad_norm": 0.25176165708531945,
|
| 1163 |
+
"learning_rate": 2.6953124043036604e-06,
|
| 1164 |
+
"loss": 0.18530716896057128,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.5312,
|
| 1169 |
+
"grad_norm": 0.272299195118528,
|
| 1170 |
+
"learning_rate": 2.667455927924667e-06,
|
| 1171 |
+
"loss": 0.18495219945907593,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.5344,
|
| 1176 |
+
"grad_norm": 0.26513870922757315,
|
| 1177 |
+
"learning_rate": 2.6395785506392164e-06,
|
| 1178 |
+
"loss": 0.18016864061355592,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.5376,
|
| 1183 |
+
"grad_norm": 0.26899577641448663,
|
| 1184 |
+
"learning_rate": 2.6116837519446407e-06,
|
| 1185 |
+
"loss": 0.18437364101409912,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.5408,
|
| 1190 |
+
"grad_norm": 0.29589553270345376,
|
| 1191 |
+
"learning_rate": 2.5837750135127192e-06,
|
| 1192 |
+
"loss": 0.18141529560089112,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.544,
|
| 1197 |
+
"grad_norm": 0.28180995392351926,
|
| 1198 |
+
"learning_rate": 2.555855818755108e-06,
|
| 1199 |
+
"loss": 0.18680166006088256,
|
| 1200 |
+
"step": 1700
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.5472,
|
| 1204 |
+
"grad_norm": 0.29608650413456306,
|
| 1205 |
+
"learning_rate": 2.5279296523885636e-06,
|
| 1206 |
+
"loss": 0.18486298322677613,
|
| 1207 |
+
"step": 1710
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.5504,
|
| 1211 |
+
"grad_norm": 0.28475957723655715,
|
| 1212 |
+
"learning_rate": 2.5e-06,
|
| 1213 |
+
"loss": 0.1850725531578064,
|
| 1214 |
+
"step": 1720
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.5536,
|
| 1218 |
+
"grad_norm": 0.27856833997611247,
|
| 1219 |
+
"learning_rate": 2.472070347611437e-06,
|
| 1220 |
+
"loss": 0.1791991949081421,
|
| 1221 |
+
"step": 1730
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.5568,
|
| 1225 |
+
"grad_norm": 0.30516489860119894,
|
| 1226 |
+
"learning_rate": 2.444144181244893e-06,
|
| 1227 |
+
"loss": 0.18483606576919556,
|
| 1228 |
+
"step": 1740
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 0.56,
|
| 1232 |
+
"grad_norm": 0.29804656625996045,
|
| 1233 |
+
"learning_rate": 2.416224986487282e-06,
|
| 1234 |
+
"loss": 0.18195321559906005,
|
| 1235 |
+
"step": 1750
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 0.5632,
|
| 1239 |
+
"grad_norm": 0.30740179095263215,
|
| 1240 |
+
"learning_rate": 2.3883162480553605e-06,
|
| 1241 |
+
"loss": 0.17964634895324708,
|
| 1242 |
+
"step": 1760
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 0.5664,
|
| 1246 |
+
"grad_norm": 0.29672245353605753,
|
| 1247 |
+
"learning_rate": 2.3604214493607844e-06,
|
| 1248 |
+
"loss": 0.18308933973312377,
|
| 1249 |
+
"step": 1770
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 0.5696,
|
| 1253 |
+
"grad_norm": 0.2837212145176832,
|
| 1254 |
+
"learning_rate": 2.332544072075333e-06,
|
| 1255 |
+
"loss": 0.18688681125640869,
|
| 1256 |
+
"step": 1780
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 0.5728,
|
| 1260 |
+
"grad_norm": 0.28451872958084823,
|
| 1261 |
+
"learning_rate": 2.30468759569634e-06,
|
| 1262 |
+
"loss": 0.18532857894897461,
|
| 1263 |
+
"step": 1790
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 0.576,
|
| 1267 |
+
"grad_norm": 0.29734825652467917,
|
| 1268 |
+
"learning_rate": 2.276855497112408e-06,
|
| 1269 |
+
"loss": 0.18262310028076173,
|
| 1270 |
+
"step": 1800
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 0.5792,
|
| 1274 |
+
"grad_norm": 0.3012944650683003,
|
| 1275 |
+
"learning_rate": 2.2490512501694394e-06,
|
| 1276 |
+
"loss": 0.17781586647033693,
|
| 1277 |
+
"step": 1810
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 0.5824,
|
| 1281 |
+
"grad_norm": 0.2692920477116042,
|
| 1282 |
+
"learning_rate": 2.2212783252370496e-06,
|
| 1283 |
+
"loss": 0.18318163156509398,
|
| 1284 |
+
"step": 1820
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 0.5856,
|
| 1288 |
+
"grad_norm": 0.2700619255739624,
|
| 1289 |
+
"learning_rate": 2.1935401887754213e-06,
|
| 1290 |
+
"loss": 0.18857367038726808,
|
| 1291 |
+
"step": 1830
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 0.5888,
|
| 1295 |
+
"grad_norm": 0.2868516489290536,
|
| 1296 |
+
"learning_rate": 2.165840302902632e-06,
|
| 1297 |
+
"loss": 0.18190672397613525,
|
| 1298 |
+
"step": 1840
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 0.592,
|
| 1302 |
+
"grad_norm": 0.28726300225812107,
|
| 1303 |
+
"learning_rate": 2.1381821249625383e-06,
|
| 1304 |
+
"loss": 0.1867521286010742,
|
| 1305 |
+
"step": 1850
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 0.5952,
|
| 1309 |
+
"grad_norm": 0.2995145996099388,
|
| 1310 |
+
"learning_rate": 2.1105691070932465e-06,
|
| 1311 |
+
"loss": 0.17851842641830445,
|
| 1312 |
+
"step": 1860
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 0.5984,
|
| 1316 |
+
"grad_norm": 0.28575212768410063,
|
| 1317 |
+
"learning_rate": 2.083004695796238e-06,
|
| 1318 |
+
"loss": 0.17741835117340088,
|
| 1319 |
+
"step": 1870
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 0.6016,
|
| 1323 |
+
"grad_norm": 0.31284763297048707,
|
| 1324 |
+
"learning_rate": 2.055492331506194e-06,
|
| 1325 |
+
"loss": 0.1843113422393799,
|
| 1326 |
+
"step": 1880
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 0.6048,
|
| 1330 |
+
"grad_norm": 0.3170666816206652,
|
| 1331 |
+
"learning_rate": 2.0280354481615814e-06,
|
| 1332 |
+
"loss": 0.18248820304870605,
|
| 1333 |
+
"step": 1890
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 0.608,
|
| 1337 |
+
"grad_norm": 0.30950907311465886,
|
| 1338 |
+
"learning_rate": 2.000637472776049e-06,
|
| 1339 |
+
"loss": 0.1839754819869995,
|
| 1340 |
+
"step": 1900
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 0.6112,
|
| 1344 |
+
"grad_norm": 0.2536972696685391,
|
| 1345 |
+
"learning_rate": 1.973301825010685e-06,
|
| 1346 |
+
"loss": 0.17841637134552002,
|
| 1347 |
+
"step": 1910
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 0.6144,
|
| 1351 |
+
"grad_norm": 0.291862692607901,
|
| 1352 |
+
"learning_rate": 1.9460319167471934e-06,
|
| 1353 |
+
"loss": 0.18339977264404297,
|
| 1354 |
+
"step": 1920
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 0.6176,
|
| 1358 |
+
"grad_norm": 0.2848109477155621,
|
| 1359 |
+
"learning_rate": 1.9188311516620466e-06,
|
| 1360 |
+
"loss": 0.17915148735046388,
|
| 1361 |
+
"step": 1930
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 0.6208,
|
| 1365 |
+
"grad_norm": 0.3060077712638729,
|
| 1366 |
+
"learning_rate": 1.891702924801651e-06,
|
| 1367 |
+
"loss": 0.1848907709121704,
|
| 1368 |
+
"step": 1940
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 0.624,
|
| 1372 |
+
"grad_norm": 0.27297816434517674,
|
| 1373 |
+
"learning_rate": 1.864650622158604e-06,
|
| 1374 |
+
"loss": 0.18888840675354004,
|
| 1375 |
+
"step": 1950
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 0.6272,
|
| 1379 |
+
"grad_norm": 0.2781302448691454,
|
| 1380 |
+
"learning_rate": 1.8376776202490666e-06,
|
| 1381 |
+
"loss": 0.1847243309020996,
|
| 1382 |
+
"step": 1960
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 0.6304,
|
| 1386 |
+
"grad_norm": 0.31527749144779466,
|
| 1387 |
+
"learning_rate": 1.8107872856913293e-06,
|
| 1388 |
+
"loss": 0.17888798713684081,
|
| 1389 |
+
"step": 1970
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 0.6336,
|
| 1393 |
+
"grad_norm": 0.2981389294211551,
|
| 1394 |
+
"learning_rate": 1.7839829747856096e-06,
|
| 1395 |
+
"loss": 0.18081605434417725,
|
| 1396 |
+
"step": 1980
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 0.6368,
|
| 1400 |
+
"grad_norm": 0.29438595992497246,
|
| 1401 |
+
"learning_rate": 1.7572680330951359e-06,
|
| 1402 |
+
"loss": 0.17975808382034303,
|
| 1403 |
+
"step": 1990
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 0.64,
|
| 1407 |
+
"grad_norm": 0.2777422843592099,
|
| 1408 |
+
"learning_rate": 1.7306457950285747e-06,
|
| 1409 |
+
"loss": 0.1812159538269043,
|
| 1410 |
+
"step": 2000
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 0.6432,
|
| 1414 |
+
"grad_norm": 0.3068388373590525,
|
| 1415 |
+
"learning_rate": 1.704119583423848e-06,
|
| 1416 |
+
"loss": 0.17536230087280275,
|
| 1417 |
+
"step": 2010
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 0.6464,
|
| 1421 |
+
"grad_norm": 0.272885194568128,
|
| 1422 |
+
"learning_rate": 1.677692709133396e-06,
|
| 1423 |
+
"loss": 0.18365554809570311,
|
| 1424 |
+
"step": 2020
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 0.6496,
|
| 1428 |
+
"grad_norm": 0.3023336412584975,
|
| 1429 |
+
"learning_rate": 1.6513684706109311e-06,
|
| 1430 |
+
"loss": 0.18368566036224365,
|
| 1431 |
+
"step": 2030
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 0.6528,
|
| 1435 |
+
"grad_norm": 0.28988866387653284,
|
| 1436 |
+
"learning_rate": 1.6251501534997529e-06,
|
| 1437 |
+
"loss": 0.18175660371780394,
|
| 1438 |
+
"step": 2040
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 0.656,
|
| 1442 |
+
"grad_norm": 0.28123365590903454,
|
| 1443 |
+
"learning_rate": 1.5990410302226405e-06,
|
| 1444 |
+
"loss": 0.17483808994293212,
|
| 1445 |
+
"step": 2050
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 0.6592,
|
| 1449 |
+
"grad_norm": 0.28187049939921544,
|
| 1450 |
+
"learning_rate": 1.5730443595734162e-06,
|
| 1451 |
+
"loss": 0.18124582767486572,
|
| 1452 |
+
"step": 2060
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 0.6624,
|
| 1456 |
+
"grad_norm": 0.31643189708694724,
|
| 1457 |
+
"learning_rate": 1.5471633863101982e-06,
|
| 1458 |
+
"loss": 0.18188211917877198,
|
| 1459 |
+
"step": 2070
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 0.6656,
|
| 1463 |
+
"grad_norm": 0.3071146379480691,
|
| 1464 |
+
"learning_rate": 1.521401340750407e-06,
|
| 1465 |
+
"loss": 0.18458983898162842,
|
| 1466 |
+
"step": 2080
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 0.6688,
|
| 1470 |
+
"grad_norm": 0.30923765962914507,
|
| 1471 |
+
"learning_rate": 1.495761438367577e-06,
|
| 1472 |
+
"loss": 0.18291953802108765,
|
| 1473 |
+
"step": 2090
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 0.672,
|
| 1477 |
+
"grad_norm": 0.31506268222239586,
|
| 1478 |
+
"learning_rate": 1.4702468793900187e-06,
|
| 1479 |
+
"loss": 0.18112607002258302,
|
| 1480 |
+
"step": 2100
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 0.6752,
|
| 1484 |
+
"grad_norm": 0.2991031913192095,
|
| 1485 |
+
"learning_rate": 1.444860848401384e-06,
|
| 1486 |
+
"loss": 0.18132129907608033,
|
| 1487 |
+
"step": 2110
|
| 1488 |
+
},
|
| 1489 |
+
{
|
| 1490 |
+
"epoch": 0.6784,
|
| 1491 |
+
"grad_norm": 0.306957825954438,
|
| 1492 |
+
"learning_rate": 1.4196065139431866e-06,
|
| 1493 |
+
"loss": 0.18091821670532227,
|
| 1494 |
+
"step": 2120
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 0.6816,
|
| 1498 |
+
"grad_norm": 0.30984784981623864,
|
| 1499 |
+
"learning_rate": 1.3944870281193178e-06,
|
| 1500 |
+
"loss": 0.17975277900695802,
|
| 1501 |
+
"step": 2130
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 0.6848,
|
| 1505 |
+
"grad_norm": 0.33685631116321924,
|
| 1506 |
+
"learning_rate": 1.3695055262026208e-06,
|
| 1507 |
+
"loss": 0.18606040477752686,
|
| 1508 |
+
"step": 2140
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 0.688,
|
| 1512 |
+
"grad_norm": 0.28362188085343176,
|
| 1513 |
+
"learning_rate": 1.3446651262435679e-06,
|
| 1514 |
+
"loss": 0.17845985889434815,
|
| 1515 |
+
"step": 2150
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6912,
|
| 1519 |
+
"grad_norm": 0.28046286761312267,
|
| 1520 |
+
"learning_rate": 1.3199689286810746e-06,
|
| 1521 |
+
"loss": 0.18048195838928222,
|
| 1522 |
+
"step": 2160
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6944,
|
| 1526 |
+
"grad_norm": 0.29900090645940436,
|
| 1527 |
+
"learning_rate": 1.2954200159555294e-06,
|
| 1528 |
+
"loss": 0.17538446187973022,
|
| 1529 |
+
"step": 2170
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6976,
|
| 1533 |
+
"grad_norm": 0.32576508972663926,
|
| 1534 |
+
"learning_rate": 1.2710214521240527e-06,
|
| 1535 |
+
"loss": 0.18001599311828614,
|
| 1536 |
+
"step": 2180
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.7008,
|
| 1540 |
+
"grad_norm": 0.30869890145158635,
|
| 1541 |
+
"learning_rate": 1.246776282478063e-06,
|
| 1542 |
+
"loss": 0.18135268688201905,
|
| 1543 |
+
"step": 2190
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.704,
|
| 1547 |
+
"grad_norm": 0.28612747319198,
|
| 1548 |
+
"learning_rate": 1.222687533163181e-06,
|
| 1549 |
+
"loss": 0.18038851022720337,
|
| 1550 |
+
"step": 2200
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.7072,
|
| 1554 |
+
"grad_norm": 0.32303440375726766,
|
| 1555 |
+
"learning_rate": 1.1987582108015228e-06,
|
| 1556 |
+
"loss": 0.18109045028686524,
|
| 1557 |
+
"step": 2210
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 0.7104,
|
| 1561 |
+
"grad_norm": 0.3093047688685527,
|
| 1562 |
+
"learning_rate": 1.1749913021164255e-06,
|
| 1563 |
+
"loss": 0.18254566192626953,
|
| 1564 |
+
"step": 2220
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 0.7136,
|
| 1568 |
+
"grad_norm": 0.2882548432858515,
|
| 1569 |
+
"learning_rate": 1.1513897735596702e-06,
|
| 1570 |
+
"loss": 0.17732615470886232,
|
| 1571 |
+
"step": 2230
|
| 1572 |
+
},
|
| 1573 |
+
{
|
| 1574 |
+
"epoch": 0.7168,
|
| 1575 |
+
"grad_norm": 0.29445166285798274,
|
| 1576 |
+
"learning_rate": 1.127956570941218e-06,
|
| 1577 |
+
"loss": 0.17425966262817383,
|
| 1578 |
+
"step": 2240
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 0.72,
|
| 1582 |
+
"grad_norm": 0.3514589237334647,
|
| 1583 |
+
"learning_rate": 1.104694619061533e-06,
|
| 1584 |
+
"loss": 0.18296418190002442,
|
| 1585 |
+
"step": 2250
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 0.7232,
|
| 1589 |
+
"grad_norm": 0.32323021290499837,
|
| 1590 |
+
"learning_rate": 1.0816068213465295e-06,
|
| 1591 |
+
"loss": 0.1851881265640259,
|
| 1592 |
+
"step": 2260
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 0.7264,
|
| 1596 |
+
"grad_norm": 0.30421571681673176,
|
| 1597 |
+
"learning_rate": 1.0586960594851762e-06,
|
| 1598 |
+
"loss": 0.180436372756958,
|
| 1599 |
+
"step": 2270
|
| 1600 |
+
},
|
| 1601 |
+
{
|
| 1602 |
+
"epoch": 0.7296,
|
| 1603 |
+
"grad_norm": 0.31911631321578676,
|
| 1604 |
+
"learning_rate": 1.0359651930698217e-06,
|
| 1605 |
+
"loss": 0.17929892539978026,
|
| 1606 |
+
"step": 2280
|
| 1607 |
+
},
|
| 1608 |
+
{
|
| 1609 |
+
"epoch": 0.7328,
|
| 1610 |
+
"grad_norm": 0.30015899620754233,
|
| 1611 |
+
"learning_rate": 1.0134170592392837e-06,
|
| 1612 |
+
"loss": 0.18022915124893188,
|
| 1613 |
+
"step": 2290
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"epoch": 0.736,
|
| 1617 |
+
"grad_norm": 0.31786084969492157,
|
| 1618 |
+
"learning_rate": 9.910544723247204e-07,
|
| 1619 |
+
"loss": 0.17959039211273192,
|
| 1620 |
+
"step": 2300
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 0.7392,
|
| 1624 |
+
"grad_norm": 0.31599364626026827,
|
| 1625 |
+
"learning_rate": 9.688802234983706e-07,
|
| 1626 |
+
"loss": 0.17806137800216676,
|
| 1627 |
+
"step": 2310
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"epoch": 0.7424,
|
| 1631 |
+
"grad_norm": 0.3303243768736776,
|
| 1632 |
+
"learning_rate": 9.468970804251742e-07,
|
| 1633 |
+
"loss": 0.1811964988708496,
|
| 1634 |
+
"step": 2320
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 0.7456,
|
| 1638 |
+
"grad_norm": 0.3312986961423255,
|
| 1639 |
+
"learning_rate": 9.251077869173244e-07,
|
| 1640 |
+
"loss": 0.17583439350128174,
|
| 1641 |
+
"step": 2330
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"epoch": 0.7488,
|
| 1645 |
+
"grad_norm": 0.30030412592967864,
|
| 1646 |
+
"learning_rate": 9.035150625918054e-07,
|
| 1647 |
+
"loss": 0.17623555660247803,
|
| 1648 |
+
"step": 2340
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 0.752,
|
| 1652 |
+
"grad_norm": 0.3177646626866783,
|
| 1653 |
+
"learning_rate": 8.821216025309395e-07,
|
| 1654 |
+
"loss": 0.18003884553909302,
|
| 1655 |
+
"step": 2350
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 0.7552,
|
| 1659 |
+
"grad_norm": 0.3012142976429357,
|
| 1660 |
+
"learning_rate": 8.609300769460055e-07,
|
| 1661 |
+
"loss": 0.17543296813964843,
|
| 1662 |
+
"step": 2360
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 0.7584,
|
| 1666 |
+
"grad_norm": 0.3177168816443014,
|
| 1667 |
+
"learning_rate": 8.399431308439592e-07,
|
| 1668 |
+
"loss": 0.18021781444549562,
|
| 1669 |
+
"step": 2370
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 0.7616,
|
| 1673 |
+
"grad_norm": 0.34248252589513506,
|
| 1674 |
+
"learning_rate": 8.191633836972962e-07,
|
| 1675 |
+
"loss": 0.18125417232513427,
|
| 1676 |
+
"step": 2380
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 0.7648,
|
| 1680 |
+
"grad_norm": 0.29292480325152365,
|
| 1681 |
+
"learning_rate": 7.985934291171024e-07,
|
| 1682 |
+
"loss": 0.17757056951522826,
|
| 1683 |
+
"step": 2390
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 0.768,
|
| 1687 |
+
"grad_norm": 0.3257764859746147,
|
| 1688 |
+
"learning_rate": 7.7823583452934e-07,
|
| 1689 |
+
"loss": 0.18096057176589966,
|
| 1690 |
+
"step": 2400
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 0.7712,
|
| 1694 |
+
"grad_norm": 0.28892062916284306,
|
| 1695 |
+
"learning_rate": 7.58093140854389e-07,
|
| 1696 |
+
"loss": 0.18015010356903077,
|
| 1697 |
+
"step": 2410
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 0.7744,
|
| 1701 |
+
"grad_norm": 0.32360358107292697,
|
| 1702 |
+
"learning_rate": 7.381678621899077e-07,
|
| 1703 |
+
"loss": 0.18067935705184937,
|
| 1704 |
+
"step": 2420
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 0.7776,
|
| 1708 |
+
"grad_norm": 0.3139428787829718,
|
| 1709 |
+
"learning_rate": 7.184624854970379e-07,
|
| 1710 |
+
"loss": 0.1768512487411499,
|
| 1711 |
+
"step": 2430
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 0.7808,
|
| 1715 |
+
"grad_norm": 0.3182311104789415,
|
| 1716 |
+
"learning_rate": 6.989794702899932e-07,
|
| 1717 |
+
"loss": 0.17589566707611085,
|
| 1718 |
+
"step": 2440
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 0.784,
|
| 1722 |
+
"grad_norm": 0.3112954733861784,
|
| 1723 |
+
"learning_rate": 6.797212483290777e-07,
|
| 1724 |
+
"loss": 0.177903413772583,
|
| 1725 |
+
"step": 2450
|
| 1726 |
+
},
|
| 1727 |
+
{
|
| 1728 |
+
"epoch": 0.7872,
|
| 1729 |
+
"grad_norm": 0.31026727362843554,
|
| 1730 |
+
"learning_rate": 6.60690223317171e-07,
|
| 1731 |
+
"loss": 0.17535500526428222,
|
| 1732 |
+
"step": 2460
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 0.7904,
|
| 1736 |
+
"grad_norm": 0.2855504901999764,
|
| 1737 |
+
"learning_rate": 6.418887705997046e-07,
|
| 1738 |
+
"loss": 0.1787285327911377,
|
| 1739 |
+
"step": 2470
|
| 1740 |
+
},
|
| 1741 |
+
{
|
| 1742 |
+
"epoch": 0.7936,
|
| 1743 |
+
"grad_norm": 0.33581031525319194,
|
| 1744 |
+
"learning_rate": 6.23319236868189e-07,
|
| 1745 |
+
"loss": 0.181508469581604,
|
| 1746 |
+
"step": 2480
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 0.7968,
|
| 1750 |
+
"grad_norm": 0.30084134655605693,
|
| 1751 |
+
"learning_rate": 6.049839398673141e-07,
|
| 1752 |
+
"loss": 0.18244649171829225,
|
| 1753 |
+
"step": 2490
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 0.8,
|
| 1757 |
+
"grad_norm": 0.3207759323449182,
|
| 1758 |
+
"learning_rate": 5.868851681056567e-07,
|
| 1759 |
+
"loss": 0.18296375274658203,
|
| 1760 |
+
"step": 2500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 0.8032,
|
| 1764 |
+
"grad_norm": 0.3103299858846911,
|
| 1765 |
+
"learning_rate": 5.690251805700467e-07,
|
| 1766 |
+
"loss": 0.18089601993560792,
|
| 1767 |
+
"step": 2510
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 0.8064,
|
| 1771 |
+
"grad_norm": 0.3310470653200237,
|
| 1772 |
+
"learning_rate": 5.514062064436096e-07,
|
| 1773 |
+
"loss": 0.1829407334327698,
|
| 1774 |
+
"step": 2520
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 0.8096,
|
| 1778 |
+
"grad_norm": 0.31783823046596615,
|
| 1779 |
+
"learning_rate": 5.34030444827533e-07,
|
| 1780 |
+
"loss": 0.17886234521865846,
|
| 1781 |
+
"step": 2530
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 0.8128,
|
| 1785 |
+
"grad_norm": 0.3279151171862584,
|
| 1786 |
+
"learning_rate": 5.169000644665895e-07,
|
| 1787 |
+
"loss": 0.17618993520736695,
|
| 1788 |
+
"step": 2540
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 0.816,
|
| 1792 |
+
"grad_norm": 0.3006249030100123,
|
| 1793 |
+
"learning_rate": 5.000172034784442e-07,
|
| 1794 |
+
"loss": 0.17779455184936524,
|
| 1795 |
+
"step": 2550
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.8192,
|
| 1799 |
+
"grad_norm": 0.3164261324675526,
|
| 1800 |
+
"learning_rate": 4.833839690867853e-07,
|
| 1801 |
+
"loss": 0.18002912998199463,
|
| 1802 |
+
"step": 2560
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 0.8224,
|
| 1806 |
+
"grad_norm": 0.31374931318878396,
|
| 1807 |
+
"learning_rate": 4.6700243735831705e-07,
|
| 1808 |
+
"loss": 0.173567795753479,
|
| 1809 |
+
"step": 2570
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 0.8256,
|
| 1813 |
+
"grad_norm": 0.31170459979916293,
|
| 1814 |
+
"learning_rate": 4.508746529436311e-07,
|
| 1815 |
+
"loss": 0.1724323034286499,
|
| 1816 |
+
"step": 2580
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 0.8288,
|
| 1820 |
+
"grad_norm": 0.3080863565290302,
|
| 1821 |
+
"learning_rate": 4.350026288220083e-07,
|
| 1822 |
+
"loss": 0.1794981598854065,
|
| 1823 |
+
"step": 2590
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 0.832,
|
| 1827 |
+
"grad_norm": 0.30618951989415283,
|
| 1828 |
+
"learning_rate": 4.1938834605017133e-07,
|
| 1829 |
+
"loss": 0.1761255979537964,
|
| 1830 |
+
"step": 2600
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 0.8352,
|
| 1834 |
+
"grad_norm": 0.3029510706797137,
|
| 1835 |
+
"learning_rate": 4.0403375351501515e-07,
|
| 1836 |
+
"loss": 0.17623082399368287,
|
| 1837 |
+
"step": 2610
|
| 1838 |
+
},
|
| 1839 |
+
{
|
| 1840 |
+
"epoch": 0.8384,
|
| 1841 |
+
"grad_norm": 0.336336912959277,
|
| 1842 |
+
"learning_rate": 3.88940767690362e-07,
|
| 1843 |
+
"loss": 0.1757615327835083,
|
| 1844 |
+
"step": 2620
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 0.8416,
|
| 1848 |
+
"grad_norm": 0.32859024308656015,
|
| 1849 |
+
"learning_rate": 3.7411127239775774e-07,
|
| 1850 |
+
"loss": 0.17455869913101196,
|
| 1851 |
+
"step": 2630
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 0.8448,
|
| 1855 |
+
"grad_norm": 0.3174124959768476,
|
| 1856 |
+
"learning_rate": 3.595471185713431e-07,
|
| 1857 |
+
"loss": 0.17312180995941162,
|
| 1858 |
+
"step": 2640
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 0.848,
|
| 1862 |
+
"grad_norm": 0.3247217043719523,
|
| 1863 |
+
"learning_rate": 3.4525012402682826e-07,
|
| 1864 |
+
"loss": 0.17421470880508422,
|
| 1865 |
+
"step": 2650
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 0.8512,
|
| 1869 |
+
"grad_norm": 0.3290462164412991,
|
| 1870 |
+
"learning_rate": 3.3122207323460804e-07,
|
| 1871 |
+
"loss": 0.17708632946014405,
|
| 1872 |
+
"step": 2660
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 0.8544,
|
| 1876 |
+
"grad_norm": 0.3024938333869805,
|
| 1877 |
+
"learning_rate": 3.1746471709702963e-07,
|
| 1878 |
+
"loss": 0.17333836555480958,
|
| 1879 |
+
"step": 2670
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 0.8576,
|
| 1883 |
+
"grad_norm": 0.32678703604131465,
|
| 1884 |
+
"learning_rate": 3.039797727298585e-07,
|
| 1885 |
+
"loss": 0.1801586151123047,
|
| 1886 |
+
"step": 2680
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 0.8608,
|
| 1890 |
+
"grad_norm": 0.32985764106850785,
|
| 1891 |
+
"learning_rate": 2.9076892324795546e-07,
|
| 1892 |
+
"loss": 0.17783432006835936,
|
| 1893 |
+
"step": 2690
|
| 1894 |
+
},
|
| 1895 |
+
{
|
| 1896 |
+
"epoch": 0.864,
|
| 1897 |
+
"grad_norm": 0.31242585953952057,
|
| 1898 |
+
"learning_rate": 2.778338175551995e-07,
|
| 1899 |
+
"loss": 0.17357670068740844,
|
| 1900 |
+
"step": 2700
|
| 1901 |
+
},
|
| 1902 |
+
{
|
| 1903 |
+
"epoch": 0.8672,
|
| 1904 |
+
"grad_norm": 0.3220012856306909,
|
| 1905 |
+
"learning_rate": 2.6517607013868326e-07,
|
| 1906 |
+
"loss": 0.18131563663482667,
|
| 1907 |
+
"step": 2710
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 0.8704,
|
| 1911 |
+
"grad_norm": 0.33350326064348024,
|
| 1912 |
+
"learning_rate": 2.527972608672002e-07,
|
| 1913 |
+
"loss": 0.17757024765014648,
|
| 1914 |
+
"step": 2720
|
| 1915 |
+
},
|
| 1916 |
+
{
|
| 1917 |
+
"epoch": 0.8736,
|
| 1918 |
+
"grad_norm": 0.335919926946263,
|
| 1919 |
+
"learning_rate": 2.40698934794053e-07,
|
| 1920 |
+
"loss": 0.17683808803558348,
|
| 1921 |
+
"step": 2730
|
| 1922 |
+
},
|
| 1923 |
+
{
|
| 1924 |
+
"epoch": 0.8768,
|
| 1925 |
+
"grad_norm": 0.3209912976041497,
|
| 1926 |
+
"learning_rate": 2.2888260196421237e-07,
|
| 1927 |
+
"loss": 0.17635661363601685,
|
| 1928 |
+
"step": 2740
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 0.88,
|
| 1932 |
+
"grad_norm": 0.3165955269677658,
|
| 1933 |
+
"learning_rate": 2.1734973722583735e-07,
|
| 1934 |
+
"loss": 0.17913974523544313,
|
| 1935 |
+
"step": 2750
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"epoch": 0.8832,
|
| 1939 |
+
"grad_norm": 0.31474674596852353,
|
| 1940 |
+
"learning_rate": 2.0610178004619564e-07,
|
| 1941 |
+
"loss": 0.17095563411712647,
|
| 1942 |
+
"step": 2760
|
| 1943 |
+
},
|
| 1944 |
+
{
|
| 1945 |
+
"epoch": 0.8864,
|
| 1946 |
+
"grad_norm": 0.305115903859637,
|
| 1947 |
+
"learning_rate": 1.9514013433199834e-07,
|
| 1948 |
+
"loss": 0.18293533325195313,
|
| 1949 |
+
"step": 2770
|
| 1950 |
+
},
|
| 1951 |
+
{
|
| 1952 |
+
"epoch": 0.8896,
|
| 1953 |
+
"grad_norm": 0.3164297745100823,
|
| 1954 |
+
"learning_rate": 1.8446616825416958e-07,
|
| 1955 |
+
"loss": 0.18138229846954346,
|
| 1956 |
+
"step": 2780
|
| 1957 |
+
},
|
| 1958 |
+
{
|
| 1959 |
+
"epoch": 0.8928,
|
| 1960 |
+
"grad_norm": 0.3526140625065779,
|
| 1961 |
+
"learning_rate": 1.7408121407708007e-07,
|
| 1962 |
+
"loss": 0.18163397312164306,
|
| 1963 |
+
"step": 2790
|
| 1964 |
+
},
|
| 1965 |
+
{
|
| 1966 |
+
"epoch": 0.896,
|
| 1967 |
+
"grad_norm": 0.3224933819196559,
|
| 1968 |
+
"learning_rate": 1.6398656799226253e-07,
|
| 1969 |
+
"loss": 0.1705089807510376,
|
| 1970 |
+
"step": 2800
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 0.8992,
|
| 1974 |
+
"grad_norm": 0.31764589677400257,
|
| 1975 |
+
"learning_rate": 1.5418348995662773e-07,
|
| 1976 |
+
"loss": 0.17652597427368164,
|
| 1977 |
+
"step": 2810
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 0.9024,
|
| 1981 |
+
"grad_norm": 0.3414067132784035,
|
| 1982 |
+
"learning_rate": 1.4467320353520275e-07,
|
| 1983 |
+
"loss": 0.17487871646881104,
|
| 1984 |
+
"step": 2820
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 0.9056,
|
| 1988 |
+
"grad_norm": 0.3138098972679996,
|
| 1989 |
+
"learning_rate": 1.3545689574841341e-07,
|
| 1990 |
+
"loss": 0.17592911720275878,
|
| 1991 |
+
"step": 2830
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 0.9088,
|
| 1995 |
+
"grad_norm": 0.31560573280288073,
|
| 1996 |
+
"learning_rate": 1.26535716923927e-07,
|
| 1997 |
+
"loss": 0.18197228908538818,
|
| 1998 |
+
"step": 2840
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"epoch": 0.912,
|
| 2002 |
+
"grad_norm": 0.3188962184685744,
|
| 2003 |
+
"learning_rate": 1.1791078055307493e-07,
|
| 2004 |
+
"loss": 0.1777464509010315,
|
| 2005 |
+
"step": 2850
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 0.9152,
|
| 2009 |
+
"grad_norm": 0.31575220367713525,
|
| 2010 |
+
"learning_rate": 1.0958316315187289e-07,
|
| 2011 |
+
"loss": 0.17706483602523804,
|
| 2012 |
+
"step": 2860
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 0.9184,
|
| 2016 |
+
"grad_norm": 0.3131837624055497,
|
| 2017 |
+
"learning_rate": 1.0155390412665528e-07,
|
| 2018 |
+
"loss": 0.17496002912521363,
|
| 2019 |
+
"step": 2870
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 0.9216,
|
| 2023 |
+
"grad_norm": 0.32248583567737266,
|
| 2024 |
+
"learning_rate": 9.38240056443443e-08,
|
| 2025 |
+
"loss": 0.17229046821594238,
|
| 2026 |
+
"step": 2880
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 0.9248,
|
| 2030 |
+
"grad_norm": 0.3101253584845484,
|
| 2031 |
+
"learning_rate": 8.639443250736402e-08,
|
| 2032 |
+
"loss": 0.17552309036254882,
|
| 2033 |
+
"step": 2890
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 0.928,
|
| 2037 |
+
"grad_norm": 0.33217431742972764,
|
| 2038 |
+
"learning_rate": 7.926611203321777e-08,
|
| 2039 |
+
"loss": 0.17659810781478882,
|
| 2040 |
+
"step": 2900
|
| 2041 |
+
},
|
| 2042 |
+
{
|
| 2043 |
+
"epoch": 0.9312,
|
| 2044 |
+
"grad_norm": 0.33918124282098266,
|
| 2045 |
+
"learning_rate": 7.243993393874882e-08,
|
| 2046 |
+
"loss": 0.17737939357757568,
|
| 2047 |
+
"step": 2910
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 0.9344,
|
| 2051 |
+
"grad_norm": 0.31351790893613213,
|
| 2052 |
+
"learning_rate": 6.591675022908805e-08,
|
| 2053 |
+
"loss": 0.1745692253112793,
|
| 2054 |
+
"step": 2920
|
| 2055 |
+
},
|
| 2056 |
+
{
|
| 2057 |
+
"epoch": 0.9376,
|
| 2058 |
+
"grad_norm": 0.33783778867129854,
|
| 2059 |
+
"learning_rate": 5.969737509131241e-08,
|
| 2060 |
+
"loss": 0.1722058415412903,
|
| 2061 |
+
"step": 2930
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"epoch": 0.9408,
|
| 2065 |
+
"grad_norm": 0.308776655874055,
|
| 2066 |
+
"learning_rate": 5.3782584792823334e-08,
|
| 2067 |
+
"loss": 0.17710112333297728,
|
| 2068 |
+
"step": 2940
|
| 2069 |
+
},
|
| 2070 |
+
{
|
| 2071 |
+
"epoch": 0.944,
|
| 2072 |
+
"grad_norm": 0.3142338038371378,
|
| 2073 |
+
"learning_rate": 4.817311758445686e-08,
|
| 2074 |
+
"loss": 0.178252911567688,
|
| 2075 |
+
"step": 2950
|
| 2076 |
+
},
|
| 2077 |
+
{
|
| 2078 |
+
"epoch": 0.9472,
|
| 2079 |
+
"grad_norm": 0.33048986218580767,
|
| 2080 |
+
"learning_rate": 4.286967360833866e-08,
|
| 2081 |
+
"loss": 0.1782402753829956,
|
| 2082 |
+
"step": 2960
|
| 2083 |
+
},
|
| 2084 |
+
{
|
| 2085 |
+
"epoch": 0.9504,
|
| 2086 |
+
"grad_norm": 0.3110909627270251,
|
| 2087 |
+
"learning_rate": 3.787291481049754e-08,
|
| 2088 |
+
"loss": 0.17829475402832032,
|
| 2089 |
+
"step": 2970
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 0.9536,
|
| 2093 |
+
"grad_norm": 0.33726065147122686,
|
| 2094 |
+
"learning_rate": 3.3183464858244364e-08,
|
| 2095 |
+
"loss": 0.18406097888946532,
|
| 2096 |
+
"step": 2980
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 0.9568,
|
| 2100 |
+
"grad_norm": 0.3326393750086487,
|
| 2101 |
+
"learning_rate": 2.8801909062328992e-08,
|
| 2102 |
+
"loss": 0.17060396671295167,
|
| 2103 |
+
"step": 2990
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 0.96,
|
| 2107 |
+
"grad_norm": 0.32948960265922206,
|
| 2108 |
+
"learning_rate": 2.4728794303886248e-08,
|
| 2109 |
+
"loss": 0.16899311542510986,
|
| 2110 |
+
"step": 3000
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 0.9632,
|
| 2114 |
+
"grad_norm": 0.33211982053439487,
|
| 2115 |
+
"learning_rate": 2.0964628966175794e-08,
|
| 2116 |
+
"loss": 0.17517964839935302,
|
| 2117 |
+
"step": 3010
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 0.9664,
|
| 2121 |
+
"grad_norm": 0.30613498697830943,
|
| 2122 |
+
"learning_rate": 1.750988287113009e-08,
|
| 2123 |
+
"loss": 0.17458994388580323,
|
| 2124 |
+
"step": 3020
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 0.9696,
|
| 2128 |
+
"grad_norm": 0.3027770955918648,
|
| 2129 |
+
"learning_rate": 1.4364987220713278e-08,
|
| 2130 |
+
"loss": 0.18178436756134034,
|
| 2131 |
+
"step": 3030
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 0.9728,
|
| 2135 |
+
"grad_norm": 0.3292318037983906,
|
| 2136 |
+
"learning_rate": 1.1530334543099763e-08,
|
| 2137 |
+
"loss": 0.1790144681930542,
|
| 2138 |
+
"step": 3040
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 0.976,
|
| 2142 |
+
"grad_norm": 0.33300755292787143,
|
| 2143 |
+
"learning_rate": 9.006278643683697e-09,
|
| 2144 |
+
"loss": 0.1808505654335022,
|
| 2145 |
+
"step": 3050
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 0.9792,
|
| 2149 |
+
"grad_norm": 0.32631723332989787,
|
| 2150 |
+
"learning_rate": 6.793134560916514e-09,
|
| 2151 |
+
"loss": 0.18275127410888672,
|
| 2152 |
+
"step": 3060
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.9824,
|
| 2156 |
+
"grad_norm": 0.3082787331662993,
|
| 2157 |
+
"learning_rate": 4.891178526986451e-09,
|
| 2158 |
+
"loss": 0.1783647656440735,
|
| 2159 |
+
"step": 3070
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 0.9856,
|
| 2163 |
+
"grad_norm": 0.32341550392390483,
|
| 2164 |
+
"learning_rate": 3.3006479333413943e-09,
|
| 2165 |
+
"loss": 0.18126009702682494,
|
| 2166 |
+
"step": 3080
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.9888,
|
| 2170 |
+
"grad_norm": 0.30931371888762194,
|
| 2171 |
+
"learning_rate": 2.021741301058422e-09,
|
| 2172 |
+
"loss": 0.17681236267089845,
|
| 2173 |
+
"step": 3090
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.992,
|
| 2177 |
+
"grad_norm": 0.3419672311636941,
|
| 2178 |
+
"learning_rate": 1.0546182560652872e-09,
|
| 2179 |
+
"loss": 0.17989683151245117,
|
| 2180 |
+
"step": 3100
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.9952,
|
| 2184 |
+
"grad_norm": 0.3111951639834393,
|
| 2185 |
+
"learning_rate": 3.9939950921774607e-10,
|
| 2186 |
+
"loss": 0.17482796907424927,
|
| 2187 |
+
"step": 3110
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.9984,
|
| 2191 |
+
"grad_norm": 0.3230413672933209,
|
| 2192 |
+
"learning_rate": 5.616684123160854e-11,
|
| 2193 |
+
"loss": 0.17436976432800294,
|
| 2194 |
+
"step": 3120
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 1.0,
|
| 2198 |
+
"step": 3125,
|
| 2199 |
+
"total_flos": 1955525886476288.0,
|
| 2200 |
+
"train_loss": 0.1948647116279602,
|
| 2201 |
+
"train_runtime": 28413.61,
|
| 2202 |
+
"train_samples_per_second": 7.039,
|
| 2203 |
+
"train_steps_per_second": 0.11
|
| 2204 |
+
}
|
| 2205 |
+
],
|
| 2206 |
+
"logging_steps": 10,
|
| 2207 |
+
"max_steps": 3125,
|
| 2208 |
+
"num_input_tokens_seen": 0,
|
| 2209 |
+
"num_train_epochs": 1,
|
| 2210 |
+
"save_steps": 500,
|
| 2211 |
+
"stateful_callbacks": {
|
| 2212 |
+
"TrainerControl": {
|
| 2213 |
+
"args": {
|
| 2214 |
+
"should_epoch_stop": false,
|
| 2215 |
+
"should_evaluate": false,
|
| 2216 |
+
"should_log": false,
|
| 2217 |
+
"should_save": true,
|
| 2218 |
+
"should_training_stop": true
|
| 2219 |
+
},
|
| 2220 |
+
"attributes": {}
|
| 2221 |
+
}
|
| 2222 |
+
},
|
| 2223 |
+
"total_flos": 1955525886476288.0,
|
| 2224 |
+
"train_batch_size": 4,
|
| 2225 |
+
"trial_name": null,
|
| 2226 |
+
"trial_params": null
|
| 2227 |
+
}
|
checkpoints/InternVL3.5-8B-SFT/training_loss.png
ADDED
|
checkpoints/Qwen3-VL-2B-SFT/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1201860236279808.0,
|
| 4 |
+
"train_loss": 0.2128027264213562,
|
| 5 |
+
"train_runtime": 15463.9635,
|
| 6 |
+
"train_samples_per_second": 12.933,
|
| 7 |
+
"train_steps_per_second": 0.202
|
| 8 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/chat_template.jinja
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{%- if messages[0].content is string %}
|
| 5 |
+
{{- messages[0].content }}
|
| 6 |
+
{%- else %}
|
| 7 |
+
{%- for content in messages[0].content %}
|
| 8 |
+
{%- if 'text' in content %}
|
| 9 |
+
{{- content.text }}
|
| 10 |
+
{%- endif %}
|
| 11 |
+
{%- endfor %}
|
| 12 |
+
{%- endif %}
|
| 13 |
+
{{- '\n\n' }}
|
| 14 |
+
{%- endif %}
|
| 15 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 16 |
+
{%- for tool in tools %}
|
| 17 |
+
{{- "\n" }}
|
| 18 |
+
{{- tool | tojson }}
|
| 19 |
+
{%- endfor %}
|
| 20 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 21 |
+
{%- else %}
|
| 22 |
+
{%- if messages[0].role == 'system' %}
|
| 23 |
+
{{- '<|im_start|>system\n' }}
|
| 24 |
+
{%- if messages[0].content is string %}
|
| 25 |
+
{{- messages[0].content }}
|
| 26 |
+
{%- else %}
|
| 27 |
+
{%- for content in messages[0].content %}
|
| 28 |
+
{%- if 'text' in content %}
|
| 29 |
+
{{- content.text }}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{%- endfor %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '<|im_end|>\n' }}
|
| 34 |
+
{%- endif %}
|
| 35 |
+
{%- endif %}
|
| 36 |
+
{%- set image_count = namespace(value=0) %}
|
| 37 |
+
{%- set video_count = namespace(value=0) %}
|
| 38 |
+
{%- for message in messages %}
|
| 39 |
+
{%- if message.role == "user" %}
|
| 40 |
+
{{- '<|im_start|>' + message.role + '\n' }}
|
| 41 |
+
{%- if message.content is string %}
|
| 42 |
+
{{- message.content }}
|
| 43 |
+
{%- else %}
|
| 44 |
+
{%- for content in message.content %}
|
| 45 |
+
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
|
| 46 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 47 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 48 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 49 |
+
{%- elif content.type == 'video' or 'video' in content %}
|
| 50 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 51 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 52 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 53 |
+
{%- elif 'text' in content %}
|
| 54 |
+
{{- content.text }}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{%- endfor %}
|
| 57 |
+
{%- endif %}
|
| 58 |
+
{{- '<|im_end|>\n' }}
|
| 59 |
+
{%- elif message.role == "assistant" %}
|
| 60 |
+
{{- '<|im_start|>' + message.role + '\n' }}
|
| 61 |
+
{%- if message.content is string %}
|
| 62 |
+
{{- message.content }}
|
| 63 |
+
{%- else %}
|
| 64 |
+
{%- for content_item in message.content %}
|
| 65 |
+
{%- if 'text' in content_item %}
|
| 66 |
+
{{- content_item.text }}
|
| 67 |
+
{%- endif %}
|
| 68 |
+
{%- endfor %}
|
| 69 |
+
{%- endif %}
|
| 70 |
+
{%- if message.tool_calls %}
|
| 71 |
+
{%- for tool_call in message.tool_calls %}
|
| 72 |
+
{%- if (loop.first and message.content) or (not loop.first) %}
|
| 73 |
+
{{- '\n' }}
|
| 74 |
+
{%- endif %}
|
| 75 |
+
{%- if tool_call.function %}
|
| 76 |
+
{%- set tool_call = tool_call.function %}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 79 |
+
{{- tool_call.name }}
|
| 80 |
+
{{- '", "arguments": ' }}
|
| 81 |
+
{%- if tool_call.arguments is string %}
|
| 82 |
+
{{- tool_call.arguments }}
|
| 83 |
+
{%- else %}
|
| 84 |
+
{{- tool_call.arguments | tojson }}
|
| 85 |
+
{%- endif %}
|
| 86 |
+
{{- '}\n</tool_call>' }}
|
| 87 |
+
{%- endfor %}
|
| 88 |
+
{%- endif %}
|
| 89 |
+
{{- '<|im_end|>\n' }}
|
| 90 |
+
{%- elif message.role == "tool" %}
|
| 91 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 92 |
+
{{- '<|im_start|>user' }}
|
| 93 |
+
{%- endif %}
|
| 94 |
+
{{- '\n<tool_response>\n' }}
|
| 95 |
+
{%- if message.content is string %}
|
| 96 |
+
{{- message.content }}
|
| 97 |
+
{%- else %}
|
| 98 |
+
{%- for content in message.content %}
|
| 99 |
+
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
|
| 100 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 101 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 102 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 103 |
+
{%- elif content.type == 'video' or 'video' in content %}
|
| 104 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 105 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 106 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 107 |
+
{%- elif 'text' in content %}
|
| 108 |
+
{{- content.text }}
|
| 109 |
+
{%- endif %}
|
| 110 |
+
{%- endfor %}
|
| 111 |
+
{%- endif %}
|
| 112 |
+
{{- '\n</tool_response>' }}
|
| 113 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 114 |
+
{{- '<|im_end|>\n' }}
|
| 115 |
+
{%- endif %}
|
| 116 |
+
{%- endif %}
|
| 117 |
+
{%- endfor %}
|
| 118 |
+
{%- if add_generation_prompt %}
|
| 119 |
+
{{- '<|im_start|>assistant\n' }}
|
| 120 |
+
{%- endif %}
|
checkpoints/Qwen3-VL-2B-SFT/config.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"bos_token_id": null,
|
| 6 |
+
"dtype": "bfloat16",
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"hidden_size": 2048,
|
| 9 |
+
"image_token_id": 151655,
|
| 10 |
+
"model_type": "qwen3_vl",
|
| 11 |
+
"pad_token_id": 151643,
|
| 12 |
+
"text_config": {
|
| 13 |
+
"attention_bias": false,
|
| 14 |
+
"attention_dropout": 0.0,
|
| 15 |
+
"bos_token_id": 151643,
|
| 16 |
+
"dtype": "bfloat16",
|
| 17 |
+
"eos_token_id": 151645,
|
| 18 |
+
"head_dim": 128,
|
| 19 |
+
"hidden_act": "silu",
|
| 20 |
+
"hidden_size": 2048,
|
| 21 |
+
"initializer_range": 0.02,
|
| 22 |
+
"intermediate_size": 6144,
|
| 23 |
+
"max_position_embeddings": 262144,
|
| 24 |
+
"model_type": "qwen3_vl_text",
|
| 25 |
+
"num_attention_heads": 16,
|
| 26 |
+
"num_hidden_layers": 28,
|
| 27 |
+
"num_key_value_heads": 8,
|
| 28 |
+
"pad_token_id": null,
|
| 29 |
+
"rms_norm_eps": 1e-06,
|
| 30 |
+
"rope_parameters": {
|
| 31 |
+
"mrope_interleaved": true,
|
| 32 |
+
"mrope_section": [
|
| 33 |
+
24,
|
| 34 |
+
20,
|
| 35 |
+
20
|
| 36 |
+
],
|
| 37 |
+
"rope_theta": 5000000,
|
| 38 |
+
"rope_type": "default"
|
| 39 |
+
},
|
| 40 |
+
"tie_word_embeddings": true,
|
| 41 |
+
"use_cache": false,
|
| 42 |
+
"vocab_size": 151936
|
| 43 |
+
},
|
| 44 |
+
"tie_word_embeddings": true,
|
| 45 |
+
"transformers_version": "5.5.3",
|
| 46 |
+
"use_cache": false,
|
| 47 |
+
"video_token_id": 151656,
|
| 48 |
+
"vision_config": {
|
| 49 |
+
"deepstack_visual_indexes": [
|
| 50 |
+
5,
|
| 51 |
+
11,
|
| 52 |
+
17
|
| 53 |
+
],
|
| 54 |
+
"depth": 24,
|
| 55 |
+
"dtype": "bfloat16",
|
| 56 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 57 |
+
"hidden_size": 1024,
|
| 58 |
+
"in_channels": 3,
|
| 59 |
+
"initializer_range": 0.02,
|
| 60 |
+
"intermediate_size": 4096,
|
| 61 |
+
"model_type": "qwen3_vl",
|
| 62 |
+
"num_heads": 16,
|
| 63 |
+
"num_position_embeddings": 2304,
|
| 64 |
+
"out_hidden_size": 2048,
|
| 65 |
+
"patch_size": 16,
|
| 66 |
+
"spatial_merge_size": 2,
|
| 67 |
+
"temporal_patch_size": 2
|
| 68 |
+
},
|
| 69 |
+
"vision_end_token_id": 151653,
|
| 70 |
+
"vision_start_token_id": 151652
|
| 71 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/eval_results_job_qwen3vl_2b_qwen3_vl_2b_20260430_002232.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mae_dx": 0.1790637931034483,
|
| 3 |
+
"rmse_dx": 0.5420201834898262,
|
| 4 |
+
"mae_dy": 0.16369482758620688,
|
| 5 |
+
"rmse_dy": 0.4466289248937244,
|
| 6 |
+
"mae_dz": 0.017312068965517242,
|
| 7 |
+
"rmse_dz": 0.11626166978875267,
|
| 8 |
+
"mae_dpitch": 0.35010344827586204,
|
| 9 |
+
"rmse_dpitch": 0.7528209935143029,
|
| 10 |
+
"mae_dyaw": 1.3350413793103448,
|
| 11 |
+
"rmse_dyaw": 2.917615133797725,
|
| 12 |
+
"mae_droll": 0.0,
|
| 13 |
+
"rmse_droll": 0.0,
|
| 14 |
+
"mae_overall": 0.3408692528735632,
|
| 15 |
+
"mae_position": 0.12002356321839079,
|
| 16 |
+
"mae_rotation": 0.5617149425287357,
|
| 17 |
+
"rmse_overall": 1.263988236295834,
|
| 18 |
+
"wp1_euc_mae": 0.08209225193702147,
|
| 19 |
+
"wp1_euc_median": 0.022360679774997918,
|
| 20 |
+
"wp2_euc_mae": 0.1592417265186995,
|
| 21 |
+
"wp2_euc_median": 0.058309518948453015,
|
| 22 |
+
"wp3_euc_mae": 0.2573809864066125,
|
| 23 |
+
"wp3_euc_median": 0.10049875621120885,
|
| 24 |
+
"wp4_euc_mae": 0.3827385749455985,
|
| 25 |
+
"wp4_euc_median": 0.1529705854077837,
|
| 26 |
+
"wp5_euc_mae": 0.5205297307973544,
|
| 27 |
+
"wp5_euc_median": 0.2197724920005007,
|
| 28 |
+
"euclidean_mae": 0.2803966541210573,
|
| 29 |
+
"ADE": 0.2803966541210573,
|
| 30 |
+
"FDE": 0.5205297307973544,
|
| 31 |
+
"ADE_median": 0.11621310610654717,
|
| 32 |
+
"FDE_median": 0.2197724920005007,
|
| 33 |
+
"SR@0.5m": 0.8663793103448276,
|
| 34 |
+
"SR@1.0m": 0.9436206896551724,
|
| 35 |
+
"SR@2.0m": 0.98,
|
| 36 |
+
"SR@5.0m": 0.9955172413793103,
|
| 37 |
+
"TrajSR@1.0m": 0.8732758620689656,
|
| 38 |
+
"TrajSR@2.0m": 0.9517241379310345,
|
| 39 |
+
"TrajSR@5.0m": 0.9887931034482759,
|
| 40 |
+
"RotAcc@1.0deg": 0.608103448275862,
|
| 41 |
+
"RotAcc@5.0deg": 0.9405172413793104,
|
| 42 |
+
"RotAcc@10.0deg": 0.9853448275862069,
|
| 43 |
+
"wp1_rot_mae": 0.7521227362111129,
|
| 44 |
+
"wp2_rot_mae": 1.0375870328696497,
|
| 45 |
+
"wp3_rot_mae": 1.394596726377232,
|
| 46 |
+
"wp4_rot_mae": 1.8037210071006413,
|
| 47 |
+
"wp5_rot_mae": 2.2606874444077585,
|
| 48 |
+
"rotation_euc_mae": 1.4497429893932787,
|
| 49 |
+
"parse_failure_rate": 0.0,
|
| 50 |
+
"parse_success_rate": 1.0,
|
| 51 |
+
"valid_samples": 1160,
|
| 52 |
+
"total_samples": 1160,
|
| 53 |
+
"parse_failures": 0,
|
| 54 |
+
"inference_engine": "vllm",
|
| 55 |
+
"vllm_version": "0.19.0"
|
| 56 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/generation_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"repetition_penalty": 1.0,
|
| 10 |
+
"temperature": 0.7,
|
| 11 |
+
"top_k": 20,
|
| 12 |
+
"top_p": 0.8,
|
| 13 |
+
"transformers_version": "5.5.3"
|
| 14 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f00f124b4c3943edcd2411fc8bac2af6a9fd0b4b769e7a55996571189440f85
|
| 3 |
+
size 4255140312
|
checkpoints/Qwen3-VL-2B-SFT/processor_config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"do_convert_rgb": true,
|
| 4 |
+
"do_normalize": true,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.5,
|
| 9 |
+
0.5,
|
| 10 |
+
0.5
|
| 11 |
+
],
|
| 12 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 13 |
+
"image_std": [
|
| 14 |
+
0.5,
|
| 15 |
+
0.5,
|
| 16 |
+
0.5
|
| 17 |
+
],
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"patch_size": 16,
|
| 20 |
+
"resample": 3,
|
| 21 |
+
"rescale_factor": 0.00392156862745098,
|
| 22 |
+
"size": {
|
| 23 |
+
"longest_edge": 16777216,
|
| 24 |
+
"shortest_edge": 65536
|
| 25 |
+
},
|
| 26 |
+
"temporal_patch_size": 2
|
| 27 |
+
},
|
| 28 |
+
"processor_class": "Qwen3VLProcessor",
|
| 29 |
+
"video_processor": {
|
| 30 |
+
"do_convert_rgb": true,
|
| 31 |
+
"do_normalize": true,
|
| 32 |
+
"do_rescale": true,
|
| 33 |
+
"do_resize": true,
|
| 34 |
+
"do_sample_frames": true,
|
| 35 |
+
"fps": 2,
|
| 36 |
+
"image_mean": [
|
| 37 |
+
0.5,
|
| 38 |
+
0.5,
|
| 39 |
+
0.5
|
| 40 |
+
],
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.5,
|
| 43 |
+
0.5,
|
| 44 |
+
0.5
|
| 45 |
+
],
|
| 46 |
+
"max_frames": 768,
|
| 47 |
+
"merge_size": 2,
|
| 48 |
+
"min_frames": 4,
|
| 49 |
+
"patch_size": 16,
|
| 50 |
+
"resample": 3,
|
| 51 |
+
"rescale_factor": 0.00392156862745098,
|
| 52 |
+
"return_metadata": false,
|
| 53 |
+
"size": {
|
| 54 |
+
"longest_edge": 25165824,
|
| 55 |
+
"shortest_edge": 4096
|
| 56 |
+
},
|
| 57 |
+
"temporal_patch_size": 2,
|
| 58 |
+
"video_processor_type": "Qwen3VLVideoProcessor"
|
| 59 |
+
}
|
| 60 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
|
| 3 |
+
size 11422650
|
checkpoints/Qwen3-VL-2B-SFT/tokenizer_config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": null,
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "<|im_end|>",
|
| 7 |
+
"errors": "replace",
|
| 8 |
+
"extra_special_tokens": [
|
| 9 |
+
"<|im_start|>",
|
| 10 |
+
"<|im_end|>",
|
| 11 |
+
"<|object_ref_start|>",
|
| 12 |
+
"<|object_ref_end|>",
|
| 13 |
+
"<|box_start|>",
|
| 14 |
+
"<|box_end|>",
|
| 15 |
+
"<|quad_start|>",
|
| 16 |
+
"<|quad_end|>",
|
| 17 |
+
"<|vision_start|>",
|
| 18 |
+
"<|vision_end|>",
|
| 19 |
+
"<|vision_pad|>",
|
| 20 |
+
"<|image_pad|>",
|
| 21 |
+
"<|video_pad|>"
|
| 22 |
+
],
|
| 23 |
+
"is_local": true,
|
| 24 |
+
"model_max_length": 262144,
|
| 25 |
+
"pad_token": "<|endoftext|>",
|
| 26 |
+
"padding_side": "right",
|
| 27 |
+
"processor_class": "Qwen3VLProcessor",
|
| 28 |
+
"split_special_tokens": false,
|
| 29 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 30 |
+
"unk_token": null
|
| 31 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1201860236279808.0,
|
| 4 |
+
"train_loss": 0.2128027264213562,
|
| 5 |
+
"train_runtime": 15463.9635,
|
| 6 |
+
"train_samples_per_second": 12.933,
|
| 7 |
+
"train_steps_per_second": 0.202
|
| 8 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/trainer_state.json
ADDED
|
@@ -0,0 +1,2227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 3125,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0032,
|
| 14 |
+
"grad_norm": 16.167704755253098,
|
| 15 |
+
"learning_rate": 1.437699680511182e-07,
|
| 16 |
+
"loss": 0.6528051853179931,
|
| 17 |
+
"step": 10
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.0064,
|
| 21 |
+
"grad_norm": 15.890120546753822,
|
| 22 |
+
"learning_rate": 3.0351437699680514e-07,
|
| 23 |
+
"loss": 0.6462714195251464,
|
| 24 |
+
"step": 20
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.0096,
|
| 28 |
+
"grad_norm": 14.94996510180698,
|
| 29 |
+
"learning_rate": 4.6325878594249205e-07,
|
| 30 |
+
"loss": 0.6038930416107178,
|
| 31 |
+
"step": 30
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.0128,
|
| 35 |
+
"grad_norm": 7.595956825837255,
|
| 36 |
+
"learning_rate": 6.230031948881789e-07,
|
| 37 |
+
"loss": 0.49077792167663575,
|
| 38 |
+
"step": 40
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.016,
|
| 42 |
+
"grad_norm": 3.026643067758099,
|
| 43 |
+
"learning_rate": 7.82747603833866e-07,
|
| 44 |
+
"loss": 0.3725566864013672,
|
| 45 |
+
"step": 50
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.0192,
|
| 49 |
+
"grad_norm": 1.45050871801394,
|
| 50 |
+
"learning_rate": 9.424920127795528e-07,
|
| 51 |
+
"loss": 0.3130798816680908,
|
| 52 |
+
"step": 60
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.0224,
|
| 56 |
+
"grad_norm": 0.7098603642718405,
|
| 57 |
+
"learning_rate": 1.1022364217252397e-06,
|
| 58 |
+
"loss": 0.29621334075927735,
|
| 59 |
+
"step": 70
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.0256,
|
| 63 |
+
"grad_norm": 0.6027577608327673,
|
| 64 |
+
"learning_rate": 1.2619808306709266e-06,
|
| 65 |
+
"loss": 0.27455599308013917,
|
| 66 |
+
"step": 80
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.0288,
|
| 70 |
+
"grad_norm": 0.6521596145147045,
|
| 71 |
+
"learning_rate": 1.4217252396166134e-06,
|
| 72 |
+
"loss": 0.2667043447494507,
|
| 73 |
+
"step": 90
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.032,
|
| 77 |
+
"grad_norm": 0.5069890685833461,
|
| 78 |
+
"learning_rate": 1.5814696485623005e-06,
|
| 79 |
+
"loss": 0.26807360649108886,
|
| 80 |
+
"step": 100
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.0352,
|
| 84 |
+
"grad_norm": 0.5470393023746721,
|
| 85 |
+
"learning_rate": 1.7412140575079875e-06,
|
| 86 |
+
"loss": 0.26680865287780764,
|
| 87 |
+
"step": 110
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.0384,
|
| 91 |
+
"grad_norm": 0.5543553869620175,
|
| 92 |
+
"learning_rate": 1.9009584664536742e-06,
|
| 93 |
+
"loss": 0.25434055328369143,
|
| 94 |
+
"step": 120
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 0.0416,
|
| 98 |
+
"grad_norm": 0.5420531484574165,
|
| 99 |
+
"learning_rate": 2.060702875399361e-06,
|
| 100 |
+
"loss": 0.25767529010772705,
|
| 101 |
+
"step": 130
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"epoch": 0.0448,
|
| 105 |
+
"grad_norm": 0.645702037816744,
|
| 106 |
+
"learning_rate": 2.220447284345048e-06,
|
| 107 |
+
"loss": 0.24863953590393068,
|
| 108 |
+
"step": 140
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 0.048,
|
| 112 |
+
"grad_norm": 0.6143136416629473,
|
| 113 |
+
"learning_rate": 2.380191693290735e-06,
|
| 114 |
+
"loss": 0.24553947448730468,
|
| 115 |
+
"step": 150
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.0512,
|
| 119 |
+
"grad_norm": 0.5094817219127052,
|
| 120 |
+
"learning_rate": 2.539936102236422e-06,
|
| 121 |
+
"loss": 0.2415369987487793,
|
| 122 |
+
"step": 160
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 0.0544,
|
| 126 |
+
"grad_norm": 0.6291606522275387,
|
| 127 |
+
"learning_rate": 2.699680511182109e-06,
|
| 128 |
+
"loss": 0.24887418746948242,
|
| 129 |
+
"step": 170
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 0.0576,
|
| 133 |
+
"grad_norm": 0.6248895072998087,
|
| 134 |
+
"learning_rate": 2.8594249201277955e-06,
|
| 135 |
+
"loss": 0.2414403438568115,
|
| 136 |
+
"step": 180
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 0.0608,
|
| 140 |
+
"grad_norm": 0.6640745861299296,
|
| 141 |
+
"learning_rate": 3.0191693290734825e-06,
|
| 142 |
+
"loss": 0.24553894996643066,
|
| 143 |
+
"step": 190
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 0.064,
|
| 147 |
+
"grad_norm": 0.6136916428260776,
|
| 148 |
+
"learning_rate": 3.17891373801917e-06,
|
| 149 |
+
"loss": 0.24655485153198242,
|
| 150 |
+
"step": 200
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 0.0672,
|
| 154 |
+
"grad_norm": 0.6572881584027297,
|
| 155 |
+
"learning_rate": 3.3386581469648564e-06,
|
| 156 |
+
"loss": 0.2433255910873413,
|
| 157 |
+
"step": 210
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 0.0704,
|
| 161 |
+
"grad_norm": 0.6365580690264084,
|
| 162 |
+
"learning_rate": 3.4984025559105434e-06,
|
| 163 |
+
"loss": 0.23687341213226318,
|
| 164 |
+
"step": 220
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 0.0736,
|
| 168 |
+
"grad_norm": 0.6771736107097397,
|
| 169 |
+
"learning_rate": 3.6581469648562303e-06,
|
| 170 |
+
"loss": 0.23829469680786133,
|
| 171 |
+
"step": 230
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 0.0768,
|
| 175 |
+
"grad_norm": 0.6990706788858505,
|
| 176 |
+
"learning_rate": 3.817891373801918e-06,
|
| 177 |
+
"loss": 0.23471264839172362,
|
| 178 |
+
"step": 240
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.08,
|
| 182 |
+
"grad_norm": 0.6029376877872676,
|
| 183 |
+
"learning_rate": 3.977635782747604e-06,
|
| 184 |
+
"loss": 0.23215394020080565,
|
| 185 |
+
"step": 250
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.0832,
|
| 189 |
+
"grad_norm": 0.6082124769869354,
|
| 190 |
+
"learning_rate": 4.137380191693291e-06,
|
| 191 |
+
"loss": 0.2326298713684082,
|
| 192 |
+
"step": 260
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.0864,
|
| 196 |
+
"grad_norm": 0.7069824323872274,
|
| 197 |
+
"learning_rate": 4.297124600638978e-06,
|
| 198 |
+
"loss": 0.23525137901306153,
|
| 199 |
+
"step": 270
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.0896,
|
| 203 |
+
"grad_norm": 0.6697633994539672,
|
| 204 |
+
"learning_rate": 4.456869009584665e-06,
|
| 205 |
+
"loss": 0.23122966289520264,
|
| 206 |
+
"step": 280
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.0928,
|
| 210 |
+
"grad_norm": 0.5896144959913211,
|
| 211 |
+
"learning_rate": 4.616613418530352e-06,
|
| 212 |
+
"loss": 0.2369994878768921,
|
| 213 |
+
"step": 290
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.096,
|
| 217 |
+
"grad_norm": 0.6202443536122002,
|
| 218 |
+
"learning_rate": 4.776357827476039e-06,
|
| 219 |
+
"loss": 0.23878774642944336,
|
| 220 |
+
"step": 300
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.0992,
|
| 224 |
+
"grad_norm": 0.654740818437731,
|
| 225 |
+
"learning_rate": 4.936102236421725e-06,
|
| 226 |
+
"loss": 0.22523627281188965,
|
| 227 |
+
"step": 310
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.1024,
|
| 231 |
+
"grad_norm": 0.5332231058888761,
|
| 232 |
+
"learning_rate": 4.999943833158769e-06,
|
| 233 |
+
"loss": 0.22634780406951904,
|
| 234 |
+
"step": 320
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 0.1056,
|
| 238 |
+
"grad_norm": 0.5353007164619794,
|
| 239 |
+
"learning_rate": 4.999600600490783e-06,
|
| 240 |
+
"loss": 0.23276047706604003,
|
| 241 |
+
"step": 330
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 0.1088,
|
| 245 |
+
"grad_norm": 0.53617134295571,
|
| 246 |
+
"learning_rate": 4.9989453817439345e-06,
|
| 247 |
+
"loss": 0.22672569751739502,
|
| 248 |
+
"step": 340
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 0.112,
|
| 252 |
+
"grad_norm": 0.5149149938648103,
|
| 253 |
+
"learning_rate": 4.997978258698942e-06,
|
| 254 |
+
"loss": 0.22631363868713378,
|
| 255 |
+
"step": 350
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 0.1152,
|
| 259 |
+
"grad_norm": 0.5959881018141326,
|
| 260 |
+
"learning_rate": 4.996699352066659e-06,
|
| 261 |
+
"loss": 0.22707018852233887,
|
| 262 |
+
"step": 360
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"epoch": 0.1184,
|
| 266 |
+
"grad_norm": 0.6648028246958526,
|
| 267 |
+
"learning_rate": 4.995108821473014e-06,
|
| 268 |
+
"loss": 0.22777373790740968,
|
| 269 |
+
"step": 370
|
| 270 |
+
},
|
| 271 |
+
{
|
| 272 |
+
"epoch": 0.1216,
|
| 273 |
+
"grad_norm": 0.6395047869916185,
|
| 274 |
+
"learning_rate": 4.993206865439084e-06,
|
| 275 |
+
"loss": 0.22382116317749023,
|
| 276 |
+
"step": 380
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"epoch": 0.1248,
|
| 280 |
+
"grad_norm": 0.6449783716947614,
|
| 281 |
+
"learning_rate": 4.990993721356317e-06,
|
| 282 |
+
"loss": 0.22268824577331542,
|
| 283 |
+
"step": 390
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"epoch": 0.128,
|
| 287 |
+
"grad_norm": 0.6709421623745665,
|
| 288 |
+
"learning_rate": 4.988469665456901e-06,
|
| 289 |
+
"loss": 0.22317943572998047,
|
| 290 |
+
"step": 400
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"epoch": 0.1312,
|
| 294 |
+
"grad_norm": 0.5466948727484514,
|
| 295 |
+
"learning_rate": 4.985635012779288e-06,
|
| 296 |
+
"loss": 0.23101482391357422,
|
| 297 |
+
"step": 410
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.1344,
|
| 301 |
+
"grad_norm": 0.48989327197226856,
|
| 302 |
+
"learning_rate": 4.98249011712887e-06,
|
| 303 |
+
"loss": 0.2234072208404541,
|
| 304 |
+
"step": 420
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"epoch": 0.1376,
|
| 308 |
+
"grad_norm": 0.5417400145938138,
|
| 309 |
+
"learning_rate": 4.979035371033824e-06,
|
| 310 |
+
"loss": 0.22212049961090088,
|
| 311 |
+
"step": 430
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 0.1408,
|
| 315 |
+
"grad_norm": 0.5576422767413268,
|
| 316 |
+
"learning_rate": 4.975271205696115e-06,
|
| 317 |
+
"loss": 0.22228083610534669,
|
| 318 |
+
"step": 440
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"epoch": 0.144,
|
| 322 |
+
"grad_norm": 0.6175584799790863,
|
| 323 |
+
"learning_rate": 4.971198090937671e-06,
|
| 324 |
+
"loss": 0.21532373428344725,
|
| 325 |
+
"step": 450
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.1472,
|
| 329 |
+
"grad_norm": 0.6360712146764758,
|
| 330 |
+
"learning_rate": 4.966816535141756e-06,
|
| 331 |
+
"loss": 0.21311187744140625,
|
| 332 |
+
"step": 460
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"epoch": 0.1504,
|
| 336 |
+
"grad_norm": 0.5401953881204377,
|
| 337 |
+
"learning_rate": 4.9621270851895035e-06,
|
| 338 |
+
"loss": 0.22237277030944824,
|
| 339 |
+
"step": 470
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"epoch": 0.1536,
|
| 343 |
+
"grad_norm": 0.5988873649948656,
|
| 344 |
+
"learning_rate": 4.957130326391662e-06,
|
| 345 |
+
"loss": 0.22391064167022706,
|
| 346 |
+
"step": 480
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"epoch": 0.1568,
|
| 350 |
+
"grad_norm": 0.5132670412160366,
|
| 351 |
+
"learning_rate": 4.951826882415544e-06,
|
| 352 |
+
"loss": 0.2206397533416748,
|
| 353 |
+
"step": 490
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 0.16,
|
| 357 |
+
"grad_norm": 0.5935020011592513,
|
| 358 |
+
"learning_rate": 4.946217415207177e-06,
|
| 359 |
+
"loss": 0.2148068904876709,
|
| 360 |
+
"step": 500
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"epoch": 0.1632,
|
| 364 |
+
"grad_norm": 0.5324390349507315,
|
| 365 |
+
"learning_rate": 4.940302624908689e-06,
|
| 366 |
+
"loss": 0.21909193992614745,
|
| 367 |
+
"step": 510
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"epoch": 0.1664,
|
| 371 |
+
"grad_norm": 0.6082929578051663,
|
| 372 |
+
"learning_rate": 4.934083249770912e-06,
|
| 373 |
+
"loss": 0.2133782386779785,
|
| 374 |
+
"step": 520
|
| 375 |
+
},
|
| 376 |
+
{
|
| 377 |
+
"epoch": 0.1696,
|
| 378 |
+
"grad_norm": 0.6272295187969801,
|
| 379 |
+
"learning_rate": 4.927560066061251e-06,
|
| 380 |
+
"loss": 0.2180723190307617,
|
| 381 |
+
"step": 530
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"epoch": 0.1728,
|
| 385 |
+
"grad_norm": 0.5538741111929965,
|
| 386 |
+
"learning_rate": 4.920733887966783e-06,
|
| 387 |
+
"loss": 0.22759020328521729,
|
| 388 |
+
"step": 540
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"epoch": 0.176,
|
| 392 |
+
"grad_norm": 0.5703593568416581,
|
| 393 |
+
"learning_rate": 4.913605567492636e-06,
|
| 394 |
+
"loss": 0.21657073497772217,
|
| 395 |
+
"step": 550
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"epoch": 0.1792,
|
| 399 |
+
"grad_norm": 0.5873043850881617,
|
| 400 |
+
"learning_rate": 4.906175994355656e-06,
|
| 401 |
+
"loss": 0.21824207305908203,
|
| 402 |
+
"step": 560
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.1824,
|
| 406 |
+
"grad_norm": 0.7955355117519857,
|
| 407 |
+
"learning_rate": 4.898446095873345e-06,
|
| 408 |
+
"loss": 0.2209712028503418,
|
| 409 |
+
"step": 570
|
| 410 |
+
},
|
| 411 |
+
{
|
| 412 |
+
"epoch": 0.1856,
|
| 413 |
+
"grad_norm": 0.5347403539894492,
|
| 414 |
+
"learning_rate": 4.890416836848128e-06,
|
| 415 |
+
"loss": 0.2184591293334961,
|
| 416 |
+
"step": 580
|
| 417 |
+
},
|
| 418 |
+
{
|
| 419 |
+
"epoch": 0.1888,
|
| 420 |
+
"grad_norm": 0.5464598874722423,
|
| 421 |
+
"learning_rate": 4.882089219446925e-06,
|
| 422 |
+
"loss": 0.2130581855773926,
|
| 423 |
+
"step": 590
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"epoch": 0.192,
|
| 427 |
+
"grad_norm": 0.5871382794412585,
|
| 428 |
+
"learning_rate": 4.873464283076074e-06,
|
| 429 |
+
"loss": 0.21770844459533692,
|
| 430 |
+
"step": 600
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 0.1952,
|
| 434 |
+
"grad_norm": 0.5516595084585112,
|
| 435 |
+
"learning_rate": 4.864543104251587e-06,
|
| 436 |
+
"loss": 0.21629047393798828,
|
| 437 |
+
"step": 610
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"epoch": 0.1984,
|
| 441 |
+
"grad_norm": 0.5949100146178041,
|
| 442 |
+
"learning_rate": 4.855326796464798e-06,
|
| 443 |
+
"loss": 0.22033746242523194,
|
| 444 |
+
"step": 620
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"epoch": 0.2016,
|
| 448 |
+
"grad_norm": 0.5798876425998256,
|
| 449 |
+
"learning_rate": 4.8458165100433725e-06,
|
| 450 |
+
"loss": 0.21477458477020264,
|
| 451 |
+
"step": 630
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"epoch": 0.2048,
|
| 455 |
+
"grad_norm": 0.563545251458103,
|
| 456 |
+
"learning_rate": 4.836013432007738e-06,
|
| 457 |
+
"loss": 0.21490144729614258,
|
| 458 |
+
"step": 640
|
| 459 |
+
},
|
| 460 |
+
{
|
| 461 |
+
"epoch": 0.208,
|
| 462 |
+
"grad_norm": 0.5256728978801903,
|
| 463 |
+
"learning_rate": 4.825918785922921e-06,
|
| 464 |
+
"loss": 0.21858677864074708,
|
| 465 |
+
"step": 650
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"epoch": 0.2112,
|
| 469 |
+
"grad_norm": 0.5062609806869888,
|
| 470 |
+
"learning_rate": 4.8155338317458315e-06,
|
| 471 |
+
"loss": 0.21592459678649903,
|
| 472 |
+
"step": 660
|
| 473 |
+
},
|
| 474 |
+
{
|
| 475 |
+
"epoch": 0.2144,
|
| 476 |
+
"grad_norm": 0.555318042395406,
|
| 477 |
+
"learning_rate": 4.804859865668002e-06,
|
| 478 |
+
"loss": 0.21323423385620116,
|
| 479 |
+
"step": 670
|
| 480 |
+
},
|
| 481 |
+
{
|
| 482 |
+
"epoch": 0.2176,
|
| 483 |
+
"grad_norm": 0.6382467151310525,
|
| 484 |
+
"learning_rate": 4.793898219953804e-06,
|
| 485 |
+
"loss": 0.2151188373565674,
|
| 486 |
+
"step": 680
|
| 487 |
+
},
|
| 488 |
+
{
|
| 489 |
+
"epoch": 0.2208,
|
| 490 |
+
"grad_norm": 0.5426280956852546,
|
| 491 |
+
"learning_rate": 4.782650262774164e-06,
|
| 492 |
+
"loss": 0.2155141830444336,
|
| 493 |
+
"step": 690
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.224,
|
| 497 |
+
"grad_norm": 0.5602841392771764,
|
| 498 |
+
"learning_rate": 4.7711173980357886e-06,
|
| 499 |
+
"loss": 0.21242978572845458,
|
| 500 |
+
"step": 700
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.2272,
|
| 504 |
+
"grad_norm": 0.5837827171492797,
|
| 505 |
+
"learning_rate": 4.759301065205947e-06,
|
| 506 |
+
"loss": 0.2129213333129883,
|
| 507 |
+
"step": 710
|
| 508 |
+
},
|
| 509 |
+
{
|
| 510 |
+
"epoch": 0.2304,
|
| 511 |
+
"grad_norm": 0.5678516858648391,
|
| 512 |
+
"learning_rate": 4.7472027391328e-06,
|
| 513 |
+
"loss": 0.21422340869903564,
|
| 514 |
+
"step": 720
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"epoch": 0.2336,
|
| 518 |
+
"grad_norm": 0.6213695156464779,
|
| 519 |
+
"learning_rate": 4.734823929861317e-06,
|
| 520 |
+
"loss": 0.2172607660293579,
|
| 521 |
+
"step": 730
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"epoch": 0.2368,
|
| 525 |
+
"grad_norm": 0.6084105321286742,
|
| 526 |
+
"learning_rate": 4.722166182444801e-06,
|
| 527 |
+
"loss": 0.21331138610839845,
|
| 528 |
+
"step": 740
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"epoch": 0.24,
|
| 532 |
+
"grad_norm": 0.5848312022835148,
|
| 533 |
+
"learning_rate": 4.709231076752045e-06,
|
| 534 |
+
"loss": 0.21255254745483398,
|
| 535 |
+
"step": 750
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 0.2432,
|
| 539 |
+
"grad_norm": 0.5855428740644943,
|
| 540 |
+
"learning_rate": 4.696020227270142e-06,
|
| 541 |
+
"loss": 0.21734881401062012,
|
| 542 |
+
"step": 760
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.2464,
|
| 546 |
+
"grad_norm": 0.5135013968609298,
|
| 547 |
+
"learning_rate": 4.6825352829029705e-06,
|
| 548 |
+
"loss": 0.21321442127227783,
|
| 549 |
+
"step": 770
|
| 550 |
+
},
|
| 551 |
+
{
|
| 552 |
+
"epoch": 0.2496,
|
| 553 |
+
"grad_norm": 0.5938685951597557,
|
| 554 |
+
"learning_rate": 4.668777926765392e-06,
|
| 555 |
+
"loss": 0.21113758087158202,
|
| 556 |
+
"step": 780
|
| 557 |
+
},
|
| 558 |
+
{
|
| 559 |
+
"epoch": 0.2528,
|
| 560 |
+
"grad_norm": 0.6490004462160337,
|
| 561 |
+
"learning_rate": 4.6547498759731725e-06,
|
| 562 |
+
"loss": 0.20692987442016603,
|
| 563 |
+
"step": 790
|
| 564 |
+
},
|
| 565 |
+
{
|
| 566 |
+
"epoch": 0.256,
|
| 567 |
+
"grad_norm": 0.5694207965471786,
|
| 568 |
+
"learning_rate": 4.6404528814286575e-06,
|
| 569 |
+
"loss": 0.20959222316741943,
|
| 570 |
+
"step": 800
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.2592,
|
| 574 |
+
"grad_norm": 0.5648942925010132,
|
| 575 |
+
"learning_rate": 4.6258887276022425e-06,
|
| 576 |
+
"loss": 0.21758944988250734,
|
| 577 |
+
"step": 810
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.2624,
|
| 581 |
+
"grad_norm": 0.6544068998265237,
|
| 582 |
+
"learning_rate": 4.611059232309639e-06,
|
| 583 |
+
"loss": 0.21146907806396484,
|
| 584 |
+
"step": 820
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.2656,
|
| 588 |
+
"grad_norm": 0.6680185905090128,
|
| 589 |
+
"learning_rate": 4.595966246484986e-06,
|
| 590 |
+
"loss": 0.21348462104797364,
|
| 591 |
+
"step": 830
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.2688,
|
| 595 |
+
"grad_norm": 0.4956164506371995,
|
| 596 |
+
"learning_rate": 4.580611653949829e-06,
|
| 597 |
+
"loss": 0.21317172050476074,
|
| 598 |
+
"step": 840
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 0.272,
|
| 602 |
+
"grad_norm": 0.6491508776235345,
|
| 603 |
+
"learning_rate": 4.564997371177992e-06,
|
| 604 |
+
"loss": 0.2108323574066162,
|
| 605 |
+
"step": 850
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 0.2752,
|
| 609 |
+
"grad_norm": 0.6859739128419746,
|
| 610 |
+
"learning_rate": 4.54912534705637e-06,
|
| 611 |
+
"loss": 0.21068863868713378,
|
| 612 |
+
"step": 860
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 0.2784,
|
| 616 |
+
"grad_norm": 0.5876140035889241,
|
| 617 |
+
"learning_rate": 4.532997562641683e-06,
|
| 618 |
+
"loss": 0.20738301277160645,
|
| 619 |
+
"step": 870
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"epoch": 0.2816,
|
| 623 |
+
"grad_norm": 0.5388630641864397,
|
| 624 |
+
"learning_rate": 4.516616030913214e-06,
|
| 625 |
+
"loss": 0.2113194465637207,
|
| 626 |
+
"step": 880
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.2848,
|
| 630 |
+
"grad_norm": 0.527263546069221,
|
| 631 |
+
"learning_rate": 4.499982796521556e-06,
|
| 632 |
+
"loss": 0.20718231201171874,
|
| 633 |
+
"step": 890
|
| 634 |
+
},
|
| 635 |
+
{
|
| 636 |
+
"epoch": 0.288,
|
| 637 |
+
"grad_norm": 0.6778383199902553,
|
| 638 |
+
"learning_rate": 4.48309993553341e-06,
|
| 639 |
+
"loss": 0.20899975299835205,
|
| 640 |
+
"step": 900
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 0.2912,
|
| 644 |
+
"grad_norm": 0.6041502046582736,
|
| 645 |
+
"learning_rate": 4.465969555172468e-06,
|
| 646 |
+
"loss": 0.20922982692718506,
|
| 647 |
+
"step": 910
|
| 648 |
+
},
|
| 649 |
+
{
|
| 650 |
+
"epoch": 0.2944,
|
| 651 |
+
"grad_norm": 0.5872507915529911,
|
| 652 |
+
"learning_rate": 4.448593793556391e-06,
|
| 653 |
+
"loss": 0.21518073081970215,
|
| 654 |
+
"step": 920
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"epoch": 0.2976,
|
| 658 |
+
"grad_norm": 0.5414243473578003,
|
| 659 |
+
"learning_rate": 4.430974819429954e-06,
|
| 660 |
+
"loss": 0.20869126319885253,
|
| 661 |
+
"step": 930
|
| 662 |
+
},
|
| 663 |
+
{
|
| 664 |
+
"epoch": 0.3008,
|
| 665 |
+
"grad_norm": 0.4624854855413159,
|
| 666 |
+
"learning_rate": 4.413114831894344e-06,
|
| 667 |
+
"loss": 0.20277881622314453,
|
| 668 |
+
"step": 940
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.304,
|
| 672 |
+
"grad_norm": 0.5247854876993729,
|
| 673 |
+
"learning_rate": 4.3950160601326865e-06,
|
| 674 |
+
"loss": 0.20181698799133302,
|
| 675 |
+
"step": 950
|
| 676 |
+
},
|
| 677 |
+
{
|
| 678 |
+
"epoch": 0.3072,
|
| 679 |
+
"grad_norm": 0.5808078368512252,
|
| 680 |
+
"learning_rate": 4.376680763131811e-06,
|
| 681 |
+
"loss": 0.20898809432983398,
|
| 682 |
+
"step": 960
|
| 683 |
+
},
|
| 684 |
+
{
|
| 685 |
+
"epoch": 0.3104,
|
| 686 |
+
"grad_norm": 0.5805212694083882,
|
| 687 |
+
"learning_rate": 4.358111229400296e-06,
|
| 688 |
+
"loss": 0.21212198734283447,
|
| 689 |
+
"step": 970
|
| 690 |
+
},
|
| 691 |
+
{
|
| 692 |
+
"epoch": 0.3136,
|
| 693 |
+
"grad_norm": 0.5721764020420262,
|
| 694 |
+
"learning_rate": 4.33930977668283e-06,
|
| 695 |
+
"loss": 0.21448736190795897,
|
| 696 |
+
"step": 980
|
| 697 |
+
},
|
| 698 |
+
{
|
| 699 |
+
"epoch": 0.3168,
|
| 700 |
+
"grad_norm": 0.5598008397585128,
|
| 701 |
+
"learning_rate": 4.320278751670922e-06,
|
| 702 |
+
"loss": 0.20758256912231446,
|
| 703 |
+
"step": 990
|
| 704 |
+
},
|
| 705 |
+
{
|
| 706 |
+
"epoch": 0.32,
|
| 707 |
+
"grad_norm": 0.5522723710696453,
|
| 708 |
+
"learning_rate": 4.301020529710009e-06,
|
| 709 |
+
"loss": 0.20947573184967042,
|
| 710 |
+
"step": 1000
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.3232,
|
| 714 |
+
"grad_norm": 0.5556932215476815,
|
| 715 |
+
"learning_rate": 4.281537514502962e-06,
|
| 716 |
+
"loss": 0.2131945848464966,
|
| 717 |
+
"step": 1010
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.3264,
|
| 721 |
+
"grad_norm": 0.5256326530235461,
|
| 722 |
+
"learning_rate": 4.261832137810093e-06,
|
| 723 |
+
"loss": 0.20962438583374024,
|
| 724 |
+
"step": 1020
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.3296,
|
| 728 |
+
"grad_norm": 0.5141067804644184,
|
| 729 |
+
"learning_rate": 4.241906859145611e-06,
|
| 730 |
+
"loss": 0.21035046577453614,
|
| 731 |
+
"step": 1030
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.3328,
|
| 735 |
+
"grad_norm": 0.509376911595103,
|
| 736 |
+
"learning_rate": 4.221764165470661e-06,
|
| 737 |
+
"loss": 0.20757730007171632,
|
| 738 |
+
"step": 1040
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.336,
|
| 742 |
+
"grad_norm": 0.5632632187185198,
|
| 743 |
+
"learning_rate": 4.201406570882898e-06,
|
| 744 |
+
"loss": 0.20691304206848143,
|
| 745 |
+
"step": 1050
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.3392,
|
| 749 |
+
"grad_norm": 0.5786515758035645,
|
| 750 |
+
"learning_rate": 4.180836616302704e-06,
|
| 751 |
+
"loss": 0.20582923889160157,
|
| 752 |
+
"step": 1060
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.3424,
|
| 756 |
+
"grad_norm": 0.591108109764431,
|
| 757 |
+
"learning_rate": 4.160056869156041e-06,
|
| 758 |
+
"loss": 0.2102893590927124,
|
| 759 |
+
"step": 1070
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.3456,
|
| 763 |
+
"grad_norm": 0.5367428274966828,
|
| 764 |
+
"learning_rate": 4.139069923053995e-06,
|
| 765 |
+
"loss": 0.20834057331085204,
|
| 766 |
+
"step": 1080
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.3488,
|
| 770 |
+
"grad_norm": 0.49962583382458753,
|
| 771 |
+
"learning_rate": 4.117878397469062e-06,
|
| 772 |
+
"loss": 0.2114588975906372,
|
| 773 |
+
"step": 1090
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.352,
|
| 777 |
+
"grad_norm": 0.5580828852277292,
|
| 778 |
+
"learning_rate": 4.096484937408195e-06,
|
| 779 |
+
"loss": 0.2029412269592285,
|
| 780 |
+
"step": 1100
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.3552,
|
| 784 |
+
"grad_norm": 0.5671943339841842,
|
| 785 |
+
"learning_rate": 4.074892213082676e-06,
|
| 786 |
+
"loss": 0.20308828353881836,
|
| 787 |
+
"step": 1110
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.3584,
|
| 791 |
+
"grad_norm": 0.5583868175031171,
|
| 792 |
+
"learning_rate": 4.0531029195748265e-06,
|
| 793 |
+
"loss": 0.2104210376739502,
|
| 794 |
+
"step": 1120
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.3616,
|
| 798 |
+
"grad_norm": 0.5452939479895703,
|
| 799 |
+
"learning_rate": 4.03111977650163e-06,
|
| 800 |
+
"loss": 0.20968456268310548,
|
| 801 |
+
"step": 1130
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.3648,
|
| 805 |
+
"grad_norm": 0.6195183591357212,
|
| 806 |
+
"learning_rate": 4.008945527675281e-06,
|
| 807 |
+
"loss": 0.20957679748535157,
|
| 808 |
+
"step": 1140
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.368,
|
| 812 |
+
"grad_norm": 0.6171258889408775,
|
| 813 |
+
"learning_rate": 3.986582940760717e-06,
|
| 814 |
+
"loss": 0.1984492540359497,
|
| 815 |
+
"step": 1150
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.3712,
|
| 819 |
+
"grad_norm": 0.6164010362674036,
|
| 820 |
+
"learning_rate": 3.9640348069301785e-06,
|
| 821 |
+
"loss": 0.20632429122924806,
|
| 822 |
+
"step": 1160
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.3744,
|
| 826 |
+
"grad_norm": 0.5558070727772452,
|
| 827 |
+
"learning_rate": 3.941303940514826e-06,
|
| 828 |
+
"loss": 0.20776019096374512,
|
| 829 |
+
"step": 1170
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.3776,
|
| 833 |
+
"grad_norm": 0.5943916453083408,
|
| 834 |
+
"learning_rate": 3.918393178653472e-06,
|
| 835 |
+
"loss": 0.20839078426361085,
|
| 836 |
+
"step": 1180
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.3808,
|
| 840 |
+
"grad_norm": 0.5018385923371635,
|
| 841 |
+
"learning_rate": 3.895305380938468e-06,
|
| 842 |
+
"loss": 0.2044908285140991,
|
| 843 |
+
"step": 1190
|
| 844 |
+
},
|
| 845 |
+
{
|
| 846 |
+
"epoch": 0.384,
|
| 847 |
+
"grad_norm": 0.48660847876218716,
|
| 848 |
+
"learning_rate": 3.872043429058783e-06,
|
| 849 |
+
"loss": 0.20328717231750487,
|
| 850 |
+
"step": 1200
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 0.3872,
|
| 854 |
+
"grad_norm": 0.5586353975354608,
|
| 855 |
+
"learning_rate": 3.84861022644033e-06,
|
| 856 |
+
"loss": 0.20572426319122314,
|
| 857 |
+
"step": 1210
|
| 858 |
+
},
|
| 859 |
+
{
|
| 860 |
+
"epoch": 0.3904,
|
| 861 |
+
"grad_norm": 0.5709168788921625,
|
| 862 |
+
"learning_rate": 3.825008697883574e-06,
|
| 863 |
+
"loss": 0.21369614601135253,
|
| 864 |
+
"step": 1220
|
| 865 |
+
},
|
| 866 |
+
{
|
| 867 |
+
"epoch": 0.3936,
|
| 868 |
+
"grad_norm": 0.5589246090839964,
|
| 869 |
+
"learning_rate": 3.8012417891984776e-06,
|
| 870 |
+
"loss": 0.2072831630706787,
|
| 871 |
+
"step": 1230
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 0.3968,
|
| 875 |
+
"grad_norm": 0.5711782327133378,
|
| 876 |
+
"learning_rate": 3.777312466836819e-06,
|
| 877 |
+
"loss": 0.20526669025421143,
|
| 878 |
+
"step": 1240
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.4,
|
| 882 |
+
"grad_norm": 0.5656399244912672,
|
| 883 |
+
"learning_rate": 3.7532237175219378e-06,
|
| 884 |
+
"loss": 0.20442888736724854,
|
| 885 |
+
"step": 1250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.4032,
|
| 889 |
+
"grad_norm": 0.5520901024337347,
|
| 890 |
+
"learning_rate": 3.728978547875948e-06,
|
| 891 |
+
"loss": 0.2092284679412842,
|
| 892 |
+
"step": 1260
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.4064,
|
| 896 |
+
"grad_norm": 0.553756025103199,
|
| 897 |
+
"learning_rate": 3.7045799840444712e-06,
|
| 898 |
+
"loss": 0.20277605056762696,
|
| 899 |
+
"step": 1270
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.4096,
|
| 903 |
+
"grad_norm": 0.5430187148138641,
|
| 904 |
+
"learning_rate": 3.6800310713189258e-06,
|
| 905 |
+
"loss": 0.20491743087768555,
|
| 906 |
+
"step": 1280
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.4128,
|
| 910 |
+
"grad_norm": 0.7620941398223869,
|
| 911 |
+
"learning_rate": 3.6553348737564328e-06,
|
| 912 |
+
"loss": 0.2055516481399536,
|
| 913 |
+
"step": 1290
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.416,
|
| 917 |
+
"grad_norm": 0.5265612798122297,
|
| 918 |
+
"learning_rate": 3.6304944737973794e-06,
|
| 919 |
+
"loss": 0.21130599975585937,
|
| 920 |
+
"step": 1300
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.4192,
|
| 924 |
+
"grad_norm": 0.5353794185025008,
|
| 925 |
+
"learning_rate": 3.6055129718806836e-06,
|
| 926 |
+
"loss": 0.20504627227783204,
|
| 927 |
+
"step": 1310
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.4224,
|
| 931 |
+
"grad_norm": 0.5979654766960453,
|
| 932 |
+
"learning_rate": 3.5803934860568134e-06,
|
| 933 |
+
"loss": 0.2000981330871582,
|
| 934 |
+
"step": 1320
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.4256,
|
| 938 |
+
"grad_norm": 0.5915664314356317,
|
| 939 |
+
"learning_rate": 3.5551391515986163e-06,
|
| 940 |
+
"loss": 0.20581989288330077,
|
| 941 |
+
"step": 1330
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.4288,
|
| 945 |
+
"grad_norm": 0.562992516341074,
|
| 946 |
+
"learning_rate": 3.529753120609982e-06,
|
| 947 |
+
"loss": 0.20160207748413086,
|
| 948 |
+
"step": 1340
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.432,
|
| 952 |
+
"grad_norm": 0.7046032478558245,
|
| 953 |
+
"learning_rate": 3.5042385616324243e-06,
|
| 954 |
+
"loss": 0.2043483018875122,
|
| 955 |
+
"step": 1350
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.4352,
|
| 959 |
+
"grad_norm": 0.5184492477363449,
|
| 960 |
+
"learning_rate": 3.4785986592495934e-06,
|
| 961 |
+
"loss": 0.20285494327545167,
|
| 962 |
+
"step": 1360
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.4384,
|
| 966 |
+
"grad_norm": 0.5806380074338086,
|
| 967 |
+
"learning_rate": 3.452836613689803e-06,
|
| 968 |
+
"loss": 0.2009434223175049,
|
| 969 |
+
"step": 1370
|
| 970 |
+
},
|
| 971 |
+
{
|
| 972 |
+
"epoch": 0.4416,
|
| 973 |
+
"grad_norm": 0.5204618736945451,
|
| 974 |
+
"learning_rate": 3.426955640426584e-06,
|
| 975 |
+
"loss": 0.20416510105133057,
|
| 976 |
+
"step": 1380
|
| 977 |
+
},
|
| 978 |
+
{
|
| 979 |
+
"epoch": 0.4448,
|
| 980 |
+
"grad_norm": 0.5765864502605341,
|
| 981 |
+
"learning_rate": 3.4009589697773605e-06,
|
| 982 |
+
"loss": 0.20326631069183348,
|
| 983 |
+
"step": 1390
|
| 984 |
+
},
|
| 985 |
+
{
|
| 986 |
+
"epoch": 0.448,
|
| 987 |
+
"grad_norm": 0.5779970501460372,
|
| 988 |
+
"learning_rate": 3.3748498465002475e-06,
|
| 989 |
+
"loss": 0.20073289871215821,
|
| 990 |
+
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 0.4512,
|
| 994 |
+
"grad_norm": 0.6393995362823897,
|
| 995 |
+
"learning_rate": 3.3486315293890693e-06,
|
| 996 |
+
"loss": 0.20874643325805664,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 0.4544,
|
| 1001 |
+
"grad_norm": 0.5108762095593324,
|
| 1002 |
+
"learning_rate": 3.3223072908666053e-06,
|
| 1003 |
+
"loss": 0.19835340976715088,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 0.4576,
|
| 1008 |
+
"grad_norm": 0.6435280387445825,
|
| 1009 |
+
"learning_rate": 3.295880416576153e-06,
|
| 1010 |
+
"loss": 0.20992684364318848,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 0.4608,
|
| 1015 |
+
"grad_norm": 0.5838753206875198,
|
| 1016 |
+
"learning_rate": 3.269354204971427e-06,
|
| 1017 |
+
"loss": 0.20265870094299315,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 0.464,
|
| 1022 |
+
"grad_norm": 0.6745984788898958,
|
| 1023 |
+
"learning_rate": 3.242731966904865e-06,
|
| 1024 |
+
"loss": 0.20037527084350587,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 0.4672,
|
| 1029 |
+
"grad_norm": 0.5358161645108944,
|
| 1030 |
+
"learning_rate": 3.2160170252143913e-06,
|
| 1031 |
+
"loss": 0.20123369693756105,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 0.4704,
|
| 1036 |
+
"grad_norm": 0.5112361606823973,
|
| 1037 |
+
"learning_rate": 3.1892127143086716e-06,
|
| 1038 |
+
"loss": 0.20752406120300293,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 0.4736,
|
| 1043 |
+
"grad_norm": 0.6333759965752455,
|
| 1044 |
+
"learning_rate": 3.1623223797509347e-06,
|
| 1045 |
+
"loss": 0.19706425666809083,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 0.4768,
|
| 1050 |
+
"grad_norm": 0.6206117536462172,
|
| 1051 |
+
"learning_rate": 3.135349377841396e-06,
|
| 1052 |
+
"loss": 0.20125732421875,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 0.48,
|
| 1057 |
+
"grad_norm": 0.5541712474486513,
|
| 1058 |
+
"learning_rate": 3.1082970751983497e-06,
|
| 1059 |
+
"loss": 0.20749812126159667,
|
| 1060 |
+
"step": 1500
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 0.4832,
|
| 1064 |
+
"grad_norm": 0.5835934183180771,
|
| 1065 |
+
"learning_rate": 3.0811688483379546e-06,
|
| 1066 |
+
"loss": 0.20475554466247559,
|
| 1067 |
+
"step": 1510
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 0.4864,
|
| 1071 |
+
"grad_norm": 0.5792514427898341,
|
| 1072 |
+
"learning_rate": 3.0539680832528074e-06,
|
| 1073 |
+
"loss": 0.20504088401794435,
|
| 1074 |
+
"step": 1520
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 0.4896,
|
| 1078 |
+
"grad_norm": 0.6358843481166787,
|
| 1079 |
+
"learning_rate": 3.026698174989316e-06,
|
| 1080 |
+
"loss": 0.20325000286102296,
|
| 1081 |
+
"step": 1530
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 0.4928,
|
| 1085 |
+
"grad_norm": 0.5059500889981753,
|
| 1086 |
+
"learning_rate": 2.999362527223952e-06,
|
| 1087 |
+
"loss": 0.2031909465789795,
|
| 1088 |
+
"step": 1540
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 0.496,
|
| 1092 |
+
"grad_norm": 0.5388306821924389,
|
| 1093 |
+
"learning_rate": 2.9719645518384194e-06,
|
| 1094 |
+
"loss": 0.20504312515258788,
|
| 1095 |
+
"step": 1550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 0.4992,
|
| 1099 |
+
"grad_norm": 0.5939936480408617,
|
| 1100 |
+
"learning_rate": 2.944507668493807e-06,
|
| 1101 |
+
"loss": 0.2084404706954956,
|
| 1102 |
+
"step": 1560
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 0.5024,
|
| 1106 |
+
"grad_norm": 0.5687025114161597,
|
| 1107 |
+
"learning_rate": 2.9169953042037623e-06,
|
| 1108 |
+
"loss": 0.20367155075073243,
|
| 1109 |
+
"step": 1570
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 0.5056,
|
| 1113 |
+
"grad_norm": 0.5703613797457775,
|
| 1114 |
+
"learning_rate": 2.889430892906754e-06,
|
| 1115 |
+
"loss": 0.19950419664382935,
|
| 1116 |
+
"step": 1580
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 0.5088,
|
| 1120 |
+
"grad_norm": 0.50147360976836,
|
| 1121 |
+
"learning_rate": 2.861817875037462e-06,
|
| 1122 |
+
"loss": 0.19737675189971923,
|
| 1123 |
+
"step": 1590
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 0.512,
|
| 1127 |
+
"grad_norm": 0.5962810686359508,
|
| 1128 |
+
"learning_rate": 2.8341596970973683e-06,
|
| 1129 |
+
"loss": 0.206866455078125,
|
| 1130 |
+
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.5152,
|
| 1134 |
+
"grad_norm": 0.564566320219468,
|
| 1135 |
+
"learning_rate": 2.80645981122458e-06,
|
| 1136 |
+
"loss": 0.2020205020904541,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.5184,
|
| 1141 |
+
"grad_norm": 0.5246372929237232,
|
| 1142 |
+
"learning_rate": 2.7787216747629508e-06,
|
| 1143 |
+
"loss": 0.20939722061157226,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.5216,
|
| 1148 |
+
"grad_norm": 0.5415181940486332,
|
| 1149 |
+
"learning_rate": 2.7509487498305615e-06,
|
| 1150 |
+
"loss": 0.19629446268081666,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.5248,
|
| 1155 |
+
"grad_norm": 0.5627430222118958,
|
| 1156 |
+
"learning_rate": 2.7231445028875924e-06,
|
| 1157 |
+
"loss": 0.20240178108215331,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.528,
|
| 1162 |
+
"grad_norm": 0.5578941065241574,
|
| 1163 |
+
"learning_rate": 2.6953124043036604e-06,
|
| 1164 |
+
"loss": 0.2012562036514282,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.5312,
|
| 1169 |
+
"grad_norm": 0.5487117054063715,
|
| 1170 |
+
"learning_rate": 2.667455927924667e-06,
|
| 1171 |
+
"loss": 0.20127537250518798,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.5344,
|
| 1176 |
+
"grad_norm": 0.571360126804376,
|
| 1177 |
+
"learning_rate": 2.6395785506392164e-06,
|
| 1178 |
+
"loss": 0.1964709758758545,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.5376,
|
| 1183 |
+
"grad_norm": 0.6088527341362128,
|
| 1184 |
+
"learning_rate": 2.6116837519446407e-06,
|
| 1185 |
+
"loss": 0.1997244954109192,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.5408,
|
| 1190 |
+
"grad_norm": 0.5974545138027041,
|
| 1191 |
+
"learning_rate": 2.5837750135127192e-06,
|
| 1192 |
+
"loss": 0.19768773317337035,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.544,
|
| 1197 |
+
"grad_norm": 0.5496714163583045,
|
| 1198 |
+
"learning_rate": 2.555855818755108e-06,
|
| 1199 |
+
"loss": 0.20294923782348634,
|
| 1200 |
+
"step": 1700
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 0.5472,
|
| 1204 |
+
"grad_norm": 0.7083231030411815,
|
| 1205 |
+
"learning_rate": 2.5279296523885636e-06,
|
| 1206 |
+
"loss": 0.20083847045898437,
|
| 1207 |
+
"step": 1710
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 0.5504,
|
| 1211 |
+
"grad_norm": 0.5938882026412365,
|
| 1212 |
+
"learning_rate": 2.5e-06,
|
| 1213 |
+
"loss": 0.20156488418579102,
|
| 1214 |
+
"step": 1720
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 0.5536,
|
| 1218 |
+
"grad_norm": 0.5963429209905415,
|
| 1219 |
+
"learning_rate": 2.472070347611437e-06,
|
| 1220 |
+
"loss": 0.19514652490615844,
|
| 1221 |
+
"step": 1730
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 0.5568,
|
| 1225 |
+
"grad_norm": 0.6395947365412442,
|
| 1226 |
+
"learning_rate": 2.444144181244893e-06,
|
| 1227 |
+
"loss": 0.20121583938598633,
|
| 1228 |
+
"step": 1740
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 0.56,
|
| 1232 |
+
"grad_norm": 0.5998001248295249,
|
| 1233 |
+
"learning_rate": 2.416224986487282e-06,
|
| 1234 |
+
"loss": 0.19726226329803467,
|
| 1235 |
+
"step": 1750
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 0.5632,
|
| 1239 |
+
"grad_norm": 0.5593754591530539,
|
| 1240 |
+
"learning_rate": 2.3883162480553605e-06,
|
| 1241 |
+
"loss": 0.19497768878936766,
|
| 1242 |
+
"step": 1760
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 0.5664,
|
| 1246 |
+
"grad_norm": 0.5860785466160793,
|
| 1247 |
+
"learning_rate": 2.3604214493607844e-06,
|
| 1248 |
+
"loss": 0.1996150493621826,
|
| 1249 |
+
"step": 1770
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 0.5696,
|
| 1253 |
+
"grad_norm": 0.5963601131944923,
|
| 1254 |
+
"learning_rate": 2.332544072075333e-06,
|
| 1255 |
+
"loss": 0.20348951816558838,
|
| 1256 |
+
"step": 1780
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 0.5728,
|
| 1260 |
+
"grad_norm": 0.5745583695919886,
|
| 1261 |
+
"learning_rate": 2.30468759569634e-06,
|
| 1262 |
+
"loss": 0.2016512393951416,
|
| 1263 |
+
"step": 1790
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"epoch": 0.576,
|
| 1267 |
+
"grad_norm": 0.5720738010975994,
|
| 1268 |
+
"learning_rate": 2.276855497112408e-06,
|
| 1269 |
+
"loss": 0.1983588457107544,
|
| 1270 |
+
"step": 1800
|
| 1271 |
+
},
|
| 1272 |
+
{
|
| 1273 |
+
"epoch": 0.5792,
|
| 1274 |
+
"grad_norm": 0.594436652050367,
|
| 1275 |
+
"learning_rate": 2.2490512501694394e-06,
|
| 1276 |
+
"loss": 0.19393882751464844,
|
| 1277 |
+
"step": 1810
|
| 1278 |
+
},
|
| 1279 |
+
{
|
| 1280 |
+
"epoch": 0.5824,
|
| 1281 |
+
"grad_norm": 0.5547702774883363,
|
| 1282 |
+
"learning_rate": 2.2212783252370496e-06,
|
| 1283 |
+
"loss": 0.19950855970382692,
|
| 1284 |
+
"step": 1820
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"epoch": 0.5856,
|
| 1288 |
+
"grad_norm": 0.49741997333090354,
|
| 1289 |
+
"learning_rate": 2.1935401887754213e-06,
|
| 1290 |
+
"loss": 0.20486598014831542,
|
| 1291 |
+
"step": 1830
|
| 1292 |
+
},
|
| 1293 |
+
{
|
| 1294 |
+
"epoch": 0.5888,
|
| 1295 |
+
"grad_norm": 0.6191188389453962,
|
| 1296 |
+
"learning_rate": 2.165840302902632e-06,
|
| 1297 |
+
"loss": 0.1979525566101074,
|
| 1298 |
+
"step": 1840
|
| 1299 |
+
},
|
| 1300 |
+
{
|
| 1301 |
+
"epoch": 0.592,
|
| 1302 |
+
"grad_norm": 0.613998551941137,
|
| 1303 |
+
"learning_rate": 2.1381821249625383e-06,
|
| 1304 |
+
"loss": 0.2030627727508545,
|
| 1305 |
+
"step": 1850
|
| 1306 |
+
},
|
| 1307 |
+
{
|
| 1308 |
+
"epoch": 0.5952,
|
| 1309 |
+
"grad_norm": 0.6115410126221079,
|
| 1310 |
+
"learning_rate": 2.1105691070932465e-06,
|
| 1311 |
+
"loss": 0.1951197624206543,
|
| 1312 |
+
"step": 1860
|
| 1313 |
+
},
|
| 1314 |
+
{
|
| 1315 |
+
"epoch": 0.5984,
|
| 1316 |
+
"grad_norm": 0.5666967026000811,
|
| 1317 |
+
"learning_rate": 2.083004695796238e-06,
|
| 1318 |
+
"loss": 0.1926891803741455,
|
| 1319 |
+
"step": 1870
|
| 1320 |
+
},
|
| 1321 |
+
{
|
| 1322 |
+
"epoch": 0.6016,
|
| 1323 |
+
"grad_norm": 0.5564168831256036,
|
| 1324 |
+
"learning_rate": 2.055492331506194e-06,
|
| 1325 |
+
"loss": 0.20087857246398927,
|
| 1326 |
+
"step": 1880
|
| 1327 |
+
},
|
| 1328 |
+
{
|
| 1329 |
+
"epoch": 0.6048,
|
| 1330 |
+
"grad_norm": 0.647003695530594,
|
| 1331 |
+
"learning_rate": 2.0280354481615814e-06,
|
| 1332 |
+
"loss": 0.1991624116897583,
|
| 1333 |
+
"step": 1890
|
| 1334 |
+
},
|
| 1335 |
+
{
|
| 1336 |
+
"epoch": 0.608,
|
| 1337 |
+
"grad_norm": 0.6020348842840653,
|
| 1338 |
+
"learning_rate": 2.000637472776049e-06,
|
| 1339 |
+
"loss": 0.20029563903808595,
|
| 1340 |
+
"step": 1900
|
| 1341 |
+
},
|
| 1342 |
+
{
|
| 1343 |
+
"epoch": 0.6112,
|
| 1344 |
+
"grad_norm": 0.593460784828495,
|
| 1345 |
+
"learning_rate": 1.973301825010685e-06,
|
| 1346 |
+
"loss": 0.19462828636169432,
|
| 1347 |
+
"step": 1910
|
| 1348 |
+
},
|
| 1349 |
+
{
|
| 1350 |
+
"epoch": 0.6144,
|
| 1351 |
+
"grad_norm": 0.6796900420369784,
|
| 1352 |
+
"learning_rate": 1.9460319167471934e-06,
|
| 1353 |
+
"loss": 0.20009157657623292,
|
| 1354 |
+
"step": 1920
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 0.6176,
|
| 1358 |
+
"grad_norm": 0.5803908647953272,
|
| 1359 |
+
"learning_rate": 1.9188311516620466e-06,
|
| 1360 |
+
"loss": 0.19473812580108643,
|
| 1361 |
+
"step": 1930
|
| 1362 |
+
},
|
| 1363 |
+
{
|
| 1364 |
+
"epoch": 0.6208,
|
| 1365 |
+
"grad_norm": 0.5919196787967083,
|
| 1366 |
+
"learning_rate": 1.891702924801651e-06,
|
| 1367 |
+
"loss": 0.20190510749816895,
|
| 1368 |
+
"step": 1940
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"epoch": 0.624,
|
| 1372 |
+
"grad_norm": 0.6056764566097385,
|
| 1373 |
+
"learning_rate": 1.864650622158604e-06,
|
| 1374 |
+
"loss": 0.2063821792602539,
|
| 1375 |
+
"step": 1950
|
| 1376 |
+
},
|
| 1377 |
+
{
|
| 1378 |
+
"epoch": 0.6272,
|
| 1379 |
+
"grad_norm": 0.5106064574990916,
|
| 1380 |
+
"learning_rate": 1.8376776202490666e-06,
|
| 1381 |
+
"loss": 0.20139360427856445,
|
| 1382 |
+
"step": 1960
|
| 1383 |
+
},
|
| 1384 |
+
{
|
| 1385 |
+
"epoch": 0.6304,
|
| 1386 |
+
"grad_norm": 0.5816570517079882,
|
| 1387 |
+
"learning_rate": 1.8107872856913293e-06,
|
| 1388 |
+
"loss": 0.19568054676055907,
|
| 1389 |
+
"step": 1970
|
| 1390 |
+
},
|
| 1391 |
+
{
|
| 1392 |
+
"epoch": 0.6336,
|
| 1393 |
+
"grad_norm": 0.6100308085295513,
|
| 1394 |
+
"learning_rate": 1.7839829747856096e-06,
|
| 1395 |
+
"loss": 0.19661173820495606,
|
| 1396 |
+
"step": 1980
|
| 1397 |
+
},
|
| 1398 |
+
{
|
| 1399 |
+
"epoch": 0.6368,
|
| 1400 |
+
"grad_norm": 0.6256775545767371,
|
| 1401 |
+
"learning_rate": 1.7572680330951359e-06,
|
| 1402 |
+
"loss": 0.19576869010925294,
|
| 1403 |
+
"step": 1990
|
| 1404 |
+
},
|
| 1405 |
+
{
|
| 1406 |
+
"epoch": 0.64,
|
| 1407 |
+
"grad_norm": 0.5979254874380191,
|
| 1408 |
+
"learning_rate": 1.7306457950285747e-06,
|
| 1409 |
+
"loss": 0.19802470207214357,
|
| 1410 |
+
"step": 2000
|
| 1411 |
+
},
|
| 1412 |
+
{
|
| 1413 |
+
"epoch": 0.6432,
|
| 1414 |
+
"grad_norm": 0.6445065470953916,
|
| 1415 |
+
"learning_rate": 1.704119583423848e-06,
|
| 1416 |
+
"loss": 0.19182772636413575,
|
| 1417 |
+
"step": 2010
|
| 1418 |
+
},
|
| 1419 |
+
{
|
| 1420 |
+
"epoch": 0.6464,
|
| 1421 |
+
"grad_norm": 0.5238518416749739,
|
| 1422 |
+
"learning_rate": 1.677692709133396e-06,
|
| 1423 |
+
"loss": 0.19971816539764403,
|
| 1424 |
+
"step": 2020
|
| 1425 |
+
},
|
| 1426 |
+
{
|
| 1427 |
+
"epoch": 0.6496,
|
| 1428 |
+
"grad_norm": 0.5902086462380663,
|
| 1429 |
+
"learning_rate": 1.6513684706109311e-06,
|
| 1430 |
+
"loss": 0.20058016777038573,
|
| 1431 |
+
"step": 2030
|
| 1432 |
+
},
|
| 1433 |
+
{
|
| 1434 |
+
"epoch": 0.6528,
|
| 1435 |
+
"grad_norm": 0.5301315426540266,
|
| 1436 |
+
"learning_rate": 1.6251501534997529e-06,
|
| 1437 |
+
"loss": 0.19816763401031495,
|
| 1438 |
+
"step": 2040
|
| 1439 |
+
},
|
| 1440 |
+
{
|
| 1441 |
+
"epoch": 0.656,
|
| 1442 |
+
"grad_norm": 0.5702221922649561,
|
| 1443 |
+
"learning_rate": 1.5990410302226405e-06,
|
| 1444 |
+
"loss": 0.19167234897613525,
|
| 1445 |
+
"step": 2050
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 0.6592,
|
| 1449 |
+
"grad_norm": 0.5682142108318351,
|
| 1450 |
+
"learning_rate": 1.5730443595734162e-06,
|
| 1451 |
+
"loss": 0.19806729555130004,
|
| 1452 |
+
"step": 2060
|
| 1453 |
+
},
|
| 1454 |
+
{
|
| 1455 |
+
"epoch": 0.6624,
|
| 1456 |
+
"grad_norm": 0.6268750721579749,
|
| 1457 |
+
"learning_rate": 1.5471633863101982e-06,
|
| 1458 |
+
"loss": 0.1990320086479187,
|
| 1459 |
+
"step": 2070
|
| 1460 |
+
},
|
| 1461 |
+
{
|
| 1462 |
+
"epoch": 0.6656,
|
| 1463 |
+
"grad_norm": 0.6501758398050216,
|
| 1464 |
+
"learning_rate": 1.521401340750407e-06,
|
| 1465 |
+
"loss": 0.20063567161560059,
|
| 1466 |
+
"step": 2080
|
| 1467 |
+
},
|
| 1468 |
+
{
|
| 1469 |
+
"epoch": 0.6688,
|
| 1470 |
+
"grad_norm": 0.5367071332530153,
|
| 1471 |
+
"learning_rate": 1.495761438367577e-06,
|
| 1472 |
+
"loss": 0.2000502109527588,
|
| 1473 |
+
"step": 2090
|
| 1474 |
+
},
|
| 1475 |
+
{
|
| 1476 |
+
"epoch": 0.672,
|
| 1477 |
+
"grad_norm": 0.6644202151690211,
|
| 1478 |
+
"learning_rate": 1.4702468793900187e-06,
|
| 1479 |
+
"loss": 0.19811663627624512,
|
| 1480 |
+
"step": 2100
|
| 1481 |
+
},
|
| 1482 |
+
{
|
| 1483 |
+
"epoch": 0.6752,
|
| 1484 |
+
"grad_norm": 0.6020454013039992,
|
| 1485 |
+
"learning_rate": 1.444860848401384e-06,
|
| 1486 |
+
"loss": 0.19873985052108764,
|
| 1487 |
+
"step": 2110
|
| 1488 |
+
},
|
| 1489 |
+
{
|
| 1490 |
+
"epoch": 0.6784,
|
| 1491 |
+
"grad_norm": 0.5672766014696592,
|
| 1492 |
+
"learning_rate": 1.4196065139431866e-06,
|
| 1493 |
+
"loss": 0.19663108587265016,
|
| 1494 |
+
"step": 2120
|
| 1495 |
+
},
|
| 1496 |
+
{
|
| 1497 |
+
"epoch": 0.6816,
|
| 1498 |
+
"grad_norm": 0.6668756559032718,
|
| 1499 |
+
"learning_rate": 1.3944870281193178e-06,
|
| 1500 |
+
"loss": 0.19677751064300536,
|
| 1501 |
+
"step": 2130
|
| 1502 |
+
},
|
| 1503 |
+
{
|
| 1504 |
+
"epoch": 0.6848,
|
| 1505 |
+
"grad_norm": 0.6146850263092741,
|
| 1506 |
+
"learning_rate": 1.3695055262026208e-06,
|
| 1507 |
+
"loss": 0.20252432823181152,
|
| 1508 |
+
"step": 2140
|
| 1509 |
+
},
|
| 1510 |
+
{
|
| 1511 |
+
"epoch": 0.688,
|
| 1512 |
+
"grad_norm": 0.6023134400750195,
|
| 1513 |
+
"learning_rate": 1.3446651262435679e-06,
|
| 1514 |
+
"loss": 0.19564807415008545,
|
| 1515 |
+
"step": 2150
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"epoch": 0.6912,
|
| 1519 |
+
"grad_norm": 0.5973758444267007,
|
| 1520 |
+
"learning_rate": 1.3199689286810746e-06,
|
| 1521 |
+
"loss": 0.19767165184020996,
|
| 1522 |
+
"step": 2160
|
| 1523 |
+
},
|
| 1524 |
+
{
|
| 1525 |
+
"epoch": 0.6944,
|
| 1526 |
+
"grad_norm": 0.604085220565822,
|
| 1527 |
+
"learning_rate": 1.2954200159555294e-06,
|
| 1528 |
+
"loss": 0.19245314598083496,
|
| 1529 |
+
"step": 2170
|
| 1530 |
+
},
|
| 1531 |
+
{
|
| 1532 |
+
"epoch": 0.6976,
|
| 1533 |
+
"grad_norm": 0.5971658440027723,
|
| 1534 |
+
"learning_rate": 1.2710214521240527e-06,
|
| 1535 |
+
"loss": 0.19593756198883056,
|
| 1536 |
+
"step": 2180
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 0.7008,
|
| 1540 |
+
"grad_norm": 0.6712656742168871,
|
| 1541 |
+
"learning_rate": 1.246776282478063e-06,
|
| 1542 |
+
"loss": 0.19848381280899047,
|
| 1543 |
+
"step": 2190
|
| 1544 |
+
},
|
| 1545 |
+
{
|
| 1546 |
+
"epoch": 0.704,
|
| 1547 |
+
"grad_norm": 0.5303502593262494,
|
| 1548 |
+
"learning_rate": 1.222687533163181e-06,
|
| 1549 |
+
"loss": 0.19739968776702882,
|
| 1550 |
+
"step": 2200
|
| 1551 |
+
},
|
| 1552 |
+
{
|
| 1553 |
+
"epoch": 0.7072,
|
| 1554 |
+
"grad_norm": 0.6329890536946617,
|
| 1555 |
+
"learning_rate": 1.1987582108015228e-06,
|
| 1556 |
+
"loss": 0.19885218143463135,
|
| 1557 |
+
"step": 2210
|
| 1558 |
+
},
|
| 1559 |
+
{
|
| 1560 |
+
"epoch": 0.7104,
|
| 1561 |
+
"grad_norm": 0.6175733280769058,
|
| 1562 |
+
"learning_rate": 1.1749913021164255e-06,
|
| 1563 |
+
"loss": 0.20003676414489746,
|
| 1564 |
+
"step": 2220
|
| 1565 |
+
},
|
| 1566 |
+
{
|
| 1567 |
+
"epoch": 0.7136,
|
| 1568 |
+
"grad_norm": 0.6297338992517326,
|
| 1569 |
+
"learning_rate": 1.1513897735596702e-06,
|
| 1570 |
+
"loss": 0.19420522451400757,
|
| 1571 |
+
"step": 2230
|
| 1572 |
+
},
|
| 1573 |
+
{
|
| 1574 |
+
"epoch": 0.7168,
|
| 1575 |
+
"grad_norm": 0.5570261846558745,
|
| 1576 |
+
"learning_rate": 1.127956570941218e-06,
|
| 1577 |
+
"loss": 0.19144604206085206,
|
| 1578 |
+
"step": 2240
|
| 1579 |
+
},
|
| 1580 |
+
{
|
| 1581 |
+
"epoch": 0.72,
|
| 1582 |
+
"grad_norm": 0.7464999016757174,
|
| 1583 |
+
"learning_rate": 1.104694619061533e-06,
|
| 1584 |
+
"loss": 0.20028018951416016,
|
| 1585 |
+
"step": 2250
|
| 1586 |
+
},
|
| 1587 |
+
{
|
| 1588 |
+
"epoch": 0.7232,
|
| 1589 |
+
"grad_norm": 0.5813509472785208,
|
| 1590 |
+
"learning_rate": 1.0816068213465295e-06,
|
| 1591 |
+
"loss": 0.2022254228591919,
|
| 1592 |
+
"step": 2260
|
| 1593 |
+
},
|
| 1594 |
+
{
|
| 1595 |
+
"epoch": 0.7264,
|
| 1596 |
+
"grad_norm": 0.5788680063085246,
|
| 1597 |
+
"learning_rate": 1.0586960594851762e-06,
|
| 1598 |
+
"loss": 0.19734264612197877,
|
| 1599 |
+
"step": 2270
|
| 1600 |
+
},
|
| 1601 |
+
{
|
| 1602 |
+
"epoch": 0.7296,
|
| 1603 |
+
"grad_norm": 0.6879904092074834,
|
| 1604 |
+
"learning_rate": 1.0359651930698217e-06,
|
| 1605 |
+
"loss": 0.19566457271575927,
|
| 1606 |
+
"step": 2280
|
| 1607 |
+
},
|
| 1608 |
+
{
|
| 1609 |
+
"epoch": 0.7328,
|
| 1610 |
+
"grad_norm": 0.545714278159425,
|
| 1611 |
+
"learning_rate": 1.0134170592392837e-06,
|
| 1612 |
+
"loss": 0.19808268547058105,
|
| 1613 |
+
"step": 2290
|
| 1614 |
+
},
|
| 1615 |
+
{
|
| 1616 |
+
"epoch": 0.736,
|
| 1617 |
+
"grad_norm": 0.6957466724150051,
|
| 1618 |
+
"learning_rate": 9.910544723247204e-07,
|
| 1619 |
+
"loss": 0.19703471660614014,
|
| 1620 |
+
"step": 2300
|
| 1621 |
+
},
|
| 1622 |
+
{
|
| 1623 |
+
"epoch": 0.7392,
|
| 1624 |
+
"grad_norm": 0.5722555379171206,
|
| 1625 |
+
"learning_rate": 9.688802234983706e-07,
|
| 1626 |
+
"loss": 0.19638856649398803,
|
| 1627 |
+
"step": 2310
|
| 1628 |
+
},
|
| 1629 |
+
{
|
| 1630 |
+
"epoch": 0.7424,
|
| 1631 |
+
"grad_norm": 0.6657445816108672,
|
| 1632 |
+
"learning_rate": 9.468970804251742e-07,
|
| 1633 |
+
"loss": 0.1994560480117798,
|
| 1634 |
+
"step": 2320
|
| 1635 |
+
},
|
| 1636 |
+
{
|
| 1637 |
+
"epoch": 0.7456,
|
| 1638 |
+
"grad_norm": 0.6118638240003964,
|
| 1639 |
+
"learning_rate": 9.251077869173244e-07,
|
| 1640 |
+
"loss": 0.19247424602508545,
|
| 1641 |
+
"step": 2330
|
| 1642 |
+
},
|
| 1643 |
+
{
|
| 1644 |
+
"epoch": 0.7488,
|
| 1645 |
+
"grad_norm": 0.618262759129052,
|
| 1646 |
+
"learning_rate": 9.035150625918054e-07,
|
| 1647 |
+
"loss": 0.19384448528289794,
|
| 1648 |
+
"step": 2340
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"epoch": 0.752,
|
| 1652 |
+
"grad_norm": 0.5841167908088344,
|
| 1653 |
+
"learning_rate": 8.821216025309395e-07,
|
| 1654 |
+
"loss": 0.19670048952102662,
|
| 1655 |
+
"step": 2350
|
| 1656 |
+
},
|
| 1657 |
+
{
|
| 1658 |
+
"epoch": 0.7552,
|
| 1659 |
+
"grad_norm": 0.6330443090953268,
|
| 1660 |
+
"learning_rate": 8.609300769460055e-07,
|
| 1661 |
+
"loss": 0.191538667678833,
|
| 1662 |
+
"step": 2360
|
| 1663 |
+
},
|
| 1664 |
+
{
|
| 1665 |
+
"epoch": 0.7584,
|
| 1666 |
+
"grad_norm": 0.6922248169402944,
|
| 1667 |
+
"learning_rate": 8.399431308439592e-07,
|
| 1668 |
+
"loss": 0.19869886636734008,
|
| 1669 |
+
"step": 2370
|
| 1670 |
+
},
|
| 1671 |
+
{
|
| 1672 |
+
"epoch": 0.7616,
|
| 1673 |
+
"grad_norm": 0.5821907331028691,
|
| 1674 |
+
"learning_rate": 8.191633836972962e-07,
|
| 1675 |
+
"loss": 0.19837281703948975,
|
| 1676 |
+
"step": 2380
|
| 1677 |
+
},
|
| 1678 |
+
{
|
| 1679 |
+
"epoch": 0.7648,
|
| 1680 |
+
"grad_norm": 0.5484553164447705,
|
| 1681 |
+
"learning_rate": 7.985934291171024e-07,
|
| 1682 |
+
"loss": 0.19366707801818847,
|
| 1683 |
+
"step": 2390
|
| 1684 |
+
},
|
| 1685 |
+
{
|
| 1686 |
+
"epoch": 0.768,
|
| 1687 |
+
"grad_norm": 0.6131324978552078,
|
| 1688 |
+
"learning_rate": 7.7823583452934e-07,
|
| 1689 |
+
"loss": 0.19763607978820802,
|
| 1690 |
+
"step": 2400
|
| 1691 |
+
},
|
| 1692 |
+
{
|
| 1693 |
+
"epoch": 0.7712,
|
| 1694 |
+
"grad_norm": 0.5665386766642198,
|
| 1695 |
+
"learning_rate": 7.58093140854389e-07,
|
| 1696 |
+
"loss": 0.19747262001037597,
|
| 1697 |
+
"step": 2410
|
| 1698 |
+
},
|
| 1699 |
+
{
|
| 1700 |
+
"epoch": 0.7744,
|
| 1701 |
+
"grad_norm": 0.6702088035794936,
|
| 1702 |
+
"learning_rate": 7.381678621899077e-07,
|
| 1703 |
+
"loss": 0.19848825931549072,
|
| 1704 |
+
"step": 2420
|
| 1705 |
+
},
|
| 1706 |
+
{
|
| 1707 |
+
"epoch": 0.7776,
|
| 1708 |
+
"grad_norm": 0.6808200224599221,
|
| 1709 |
+
"learning_rate": 7.184624854970379e-07,
|
| 1710 |
+
"loss": 0.19454023838043213,
|
| 1711 |
+
"step": 2430
|
| 1712 |
+
},
|
| 1713 |
+
{
|
| 1714 |
+
"epoch": 0.7808,
|
| 1715 |
+
"grad_norm": 0.5446840545845119,
|
| 1716 |
+
"learning_rate": 6.989794702899932e-07,
|
| 1717 |
+
"loss": 0.1943270444869995,
|
| 1718 |
+
"step": 2440
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 0.784,
|
| 1722 |
+
"grad_norm": 0.6415010178339859,
|
| 1723 |
+
"learning_rate": 6.797212483290777e-07,
|
| 1724 |
+
"loss": 0.19584910869598388,
|
| 1725 |
+
"step": 2450
|
| 1726 |
+
},
|
| 1727 |
+
{
|
| 1728 |
+
"epoch": 0.7872,
|
| 1729 |
+
"grad_norm": 0.603526871568268,
|
| 1730 |
+
"learning_rate": 6.60690223317171e-07,
|
| 1731 |
+
"loss": 0.19342836141586303,
|
| 1732 |
+
"step": 2460
|
| 1733 |
+
},
|
| 1734 |
+
{
|
| 1735 |
+
"epoch": 0.7904,
|
| 1736 |
+
"grad_norm": 0.5817111419419255,
|
| 1737 |
+
"learning_rate": 6.418887705997046e-07,
|
| 1738 |
+
"loss": 0.19574793577194213,
|
| 1739 |
+
"step": 2470
|
| 1740 |
+
},
|
| 1741 |
+
{
|
| 1742 |
+
"epoch": 0.7936,
|
| 1743 |
+
"grad_norm": 0.7792382444355755,
|
| 1744 |
+
"learning_rate": 6.23319236868189e-07,
|
| 1745 |
+
"loss": 0.1987607717514038,
|
| 1746 |
+
"step": 2480
|
| 1747 |
+
},
|
| 1748 |
+
{
|
| 1749 |
+
"epoch": 0.7968,
|
| 1750 |
+
"grad_norm": 0.6291788716222239,
|
| 1751 |
+
"learning_rate": 6.049839398673141e-07,
|
| 1752 |
+
"loss": 0.20009655952453614,
|
| 1753 |
+
"step": 2490
|
| 1754 |
+
},
|
| 1755 |
+
{
|
| 1756 |
+
"epoch": 0.8,
|
| 1757 |
+
"grad_norm": 0.674170182636883,
|
| 1758 |
+
"learning_rate": 5.868851681056567e-07,
|
| 1759 |
+
"loss": 0.2016763210296631,
|
| 1760 |
+
"step": 2500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 0.8032,
|
| 1764 |
+
"grad_norm": 0.5738700746068163,
|
| 1765 |
+
"learning_rate": 5.690251805700467e-07,
|
| 1766 |
+
"loss": 0.19858623743057252,
|
| 1767 |
+
"step": 2510
|
| 1768 |
+
},
|
| 1769 |
+
{
|
| 1770 |
+
"epoch": 0.8064,
|
| 1771 |
+
"grad_norm": 0.5748267344102337,
|
| 1772 |
+
"learning_rate": 5.514062064436096e-07,
|
| 1773 |
+
"loss": 0.19959205389022827,
|
| 1774 |
+
"step": 2520
|
| 1775 |
+
},
|
| 1776 |
+
{
|
| 1777 |
+
"epoch": 0.8096,
|
| 1778 |
+
"grad_norm": 0.6464282974919533,
|
| 1779 |
+
"learning_rate": 5.34030444827533e-07,
|
| 1780 |
+
"loss": 0.19621236324310304,
|
| 1781 |
+
"step": 2530
|
| 1782 |
+
},
|
| 1783 |
+
{
|
| 1784 |
+
"epoch": 0.8128,
|
| 1785 |
+
"grad_norm": 0.6390320405050175,
|
| 1786 |
+
"learning_rate": 5.169000644665895e-07,
|
| 1787 |
+
"loss": 0.19293551445007323,
|
| 1788 |
+
"step": 2540
|
| 1789 |
+
},
|
| 1790 |
+
{
|
| 1791 |
+
"epoch": 0.816,
|
| 1792 |
+
"grad_norm": 0.5856228193289068,
|
| 1793 |
+
"learning_rate": 5.000172034784442e-07,
|
| 1794 |
+
"loss": 0.1952167272567749,
|
| 1795 |
+
"step": 2550
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.8192,
|
| 1799 |
+
"grad_norm": 0.6152721851543074,
|
| 1800 |
+
"learning_rate": 4.833839690867853e-07,
|
| 1801 |
+
"loss": 0.19755464792251587,
|
| 1802 |
+
"step": 2560
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 0.8224,
|
| 1806 |
+
"grad_norm": 0.6792777707129383,
|
| 1807 |
+
"learning_rate": 4.6700243735831705e-07,
|
| 1808 |
+
"loss": 0.1906466007232666,
|
| 1809 |
+
"step": 2570
|
| 1810 |
+
},
|
| 1811 |
+
{
|
| 1812 |
+
"epoch": 0.8256,
|
| 1813 |
+
"grad_norm": 0.5650779115466599,
|
| 1814 |
+
"learning_rate": 4.508746529436311e-07,
|
| 1815 |
+
"loss": 0.1896218776702881,
|
| 1816 |
+
"step": 2580
|
| 1817 |
+
},
|
| 1818 |
+
{
|
| 1819 |
+
"epoch": 0.8288,
|
| 1820 |
+
"grad_norm": 0.6068556104605155,
|
| 1821 |
+
"learning_rate": 4.350026288220083e-07,
|
| 1822 |
+
"loss": 0.1972370147705078,
|
| 1823 |
+
"step": 2590
|
| 1824 |
+
},
|
| 1825 |
+
{
|
| 1826 |
+
"epoch": 0.832,
|
| 1827 |
+
"grad_norm": 0.6087844635927864,
|
| 1828 |
+
"learning_rate": 4.1938834605017133e-07,
|
| 1829 |
+
"loss": 0.19401493072509765,
|
| 1830 |
+
"step": 2600
|
| 1831 |
+
},
|
| 1832 |
+
{
|
| 1833 |
+
"epoch": 0.8352,
|
| 1834 |
+
"grad_norm": 0.594443863161453,
|
| 1835 |
+
"learning_rate": 4.0403375351501515e-07,
|
| 1836 |
+
"loss": 0.19397275447845458,
|
| 1837 |
+
"step": 2610
|
| 1838 |
+
},
|
| 1839 |
+
{
|
| 1840 |
+
"epoch": 0.8384,
|
| 1841 |
+
"grad_norm": 0.5777613928889838,
|
| 1842 |
+
"learning_rate": 3.88940767690362e-07,
|
| 1843 |
+
"loss": 0.19363962411880492,
|
| 1844 |
+
"step": 2620
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 0.8416,
|
| 1848 |
+
"grad_norm": 0.6122408540819826,
|
| 1849 |
+
"learning_rate": 3.7411127239775774e-07,
|
| 1850 |
+
"loss": 0.19224631786346436,
|
| 1851 |
+
"step": 2630
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 0.8448,
|
| 1855 |
+
"grad_norm": 0.5922115547592817,
|
| 1856 |
+
"learning_rate": 3.595471185713431e-07,
|
| 1857 |
+
"loss": 0.19027912616729736,
|
| 1858 |
+
"step": 2640
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 0.848,
|
| 1862 |
+
"grad_norm": 0.6012010067551694,
|
| 1863 |
+
"learning_rate": 3.4525012402682826e-07,
|
| 1864 |
+
"loss": 0.1921192765235901,
|
| 1865 |
+
"step": 2650
|
| 1866 |
+
},
|
| 1867 |
+
{
|
| 1868 |
+
"epoch": 0.8512,
|
| 1869 |
+
"grad_norm": 0.6089446682050474,
|
| 1870 |
+
"learning_rate": 3.3122207323460804e-07,
|
| 1871 |
+
"loss": 0.19460537433624267,
|
| 1872 |
+
"step": 2660
|
| 1873 |
+
},
|
| 1874 |
+
{
|
| 1875 |
+
"epoch": 0.8544,
|
| 1876 |
+
"grad_norm": 0.6314431181993275,
|
| 1877 |
+
"learning_rate": 3.1746471709702963e-07,
|
| 1878 |
+
"loss": 0.19075865745544435,
|
| 1879 |
+
"step": 2670
|
| 1880 |
+
},
|
| 1881 |
+
{
|
| 1882 |
+
"epoch": 0.8576,
|
| 1883 |
+
"grad_norm": 0.6136529603147252,
|
| 1884 |
+
"learning_rate": 3.039797727298585e-07,
|
| 1885 |
+
"loss": 0.1973212718963623,
|
| 1886 |
+
"step": 2680
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 0.8608,
|
| 1890 |
+
"grad_norm": 0.6278068265217286,
|
| 1891 |
+
"learning_rate": 2.9076892324795546e-07,
|
| 1892 |
+
"loss": 0.19564627408981322,
|
| 1893 |
+
"step": 2690
|
| 1894 |
+
},
|
| 1895 |
+
{
|
| 1896 |
+
"epoch": 0.864,
|
| 1897 |
+
"grad_norm": 0.6308491327804164,
|
| 1898 |
+
"learning_rate": 2.778338175551995e-07,
|
| 1899 |
+
"loss": 0.19089040756225586,
|
| 1900 |
+
"step": 2700
|
| 1901 |
+
},
|
| 1902 |
+
{
|
| 1903 |
+
"epoch": 0.8672,
|
| 1904 |
+
"grad_norm": 0.6806226474068601,
|
| 1905 |
+
"learning_rate": 2.6517607013868326e-07,
|
| 1906 |
+
"loss": 0.19906394481658934,
|
| 1907 |
+
"step": 2710
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 0.8704,
|
| 1911 |
+
"grad_norm": 0.6497216896329614,
|
| 1912 |
+
"learning_rate": 2.527972608672002e-07,
|
| 1913 |
+
"loss": 0.19420729875564574,
|
| 1914 |
+
"step": 2720
|
| 1915 |
+
},
|
| 1916 |
+
{
|
| 1917 |
+
"epoch": 0.8736,
|
| 1918 |
+
"grad_norm": 0.5988037888796804,
|
| 1919 |
+
"learning_rate": 2.40698934794053e-07,
|
| 1920 |
+
"loss": 0.1949334740638733,
|
| 1921 |
+
"step": 2730
|
| 1922 |
+
},
|
| 1923 |
+
{
|
| 1924 |
+
"epoch": 0.8768,
|
| 1925 |
+
"grad_norm": 0.5825410688543936,
|
| 1926 |
+
"learning_rate": 2.2888260196421237e-07,
|
| 1927 |
+
"loss": 0.19373006820678712,
|
| 1928 |
+
"step": 2740
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 0.88,
|
| 1932 |
+
"grad_norm": 0.5659393573725252,
|
| 1933 |
+
"learning_rate": 2.1734973722583735e-07,
|
| 1934 |
+
"loss": 0.19743962287902833,
|
| 1935 |
+
"step": 2750
|
| 1936 |
+
},
|
| 1937 |
+
{
|
| 1938 |
+
"epoch": 0.8832,
|
| 1939 |
+
"grad_norm": 0.6810045862821603,
|
| 1940 |
+
"learning_rate": 2.0610178004619564e-07,
|
| 1941 |
+
"loss": 0.18792747259140014,
|
| 1942 |
+
"step": 2760
|
| 1943 |
+
},
|
| 1944 |
+
{
|
| 1945 |
+
"epoch": 0.8864,
|
| 1946 |
+
"grad_norm": 0.5624807528399969,
|
| 1947 |
+
"learning_rate": 1.9514013433199834e-07,
|
| 1948 |
+
"loss": 0.20065484046936036,
|
| 1949 |
+
"step": 2770
|
| 1950 |
+
},
|
| 1951 |
+
{
|
| 1952 |
+
"epoch": 0.8896,
|
| 1953 |
+
"grad_norm": 0.5300049949985157,
|
| 1954 |
+
"learning_rate": 1.8446616825416958e-07,
|
| 1955 |
+
"loss": 0.19963890314102173,
|
| 1956 |
+
"step": 2780
|
| 1957 |
+
},
|
| 1958 |
+
{
|
| 1959 |
+
"epoch": 0.8928,
|
| 1960 |
+
"grad_norm": 0.6417643354263414,
|
| 1961 |
+
"learning_rate": 1.7408121407708007e-07,
|
| 1962 |
+
"loss": 0.19946534633636476,
|
| 1963 |
+
"step": 2790
|
| 1964 |
+
},
|
| 1965 |
+
{
|
| 1966 |
+
"epoch": 0.896,
|
| 1967 |
+
"grad_norm": 0.6263783317633913,
|
| 1968 |
+
"learning_rate": 1.6398656799226253e-07,
|
| 1969 |
+
"loss": 0.1873138427734375,
|
| 1970 |
+
"step": 2800
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 0.8992,
|
| 1974 |
+
"grad_norm": 0.6642472444356609,
|
| 1975 |
+
"learning_rate": 1.5418348995662773e-07,
|
| 1976 |
+
"loss": 0.1936098575592041,
|
| 1977 |
+
"step": 2810
|
| 1978 |
+
},
|
| 1979 |
+
{
|
| 1980 |
+
"epoch": 0.9024,
|
| 1981 |
+
"grad_norm": 0.6361104958877116,
|
| 1982 |
+
"learning_rate": 1.4467320353520275e-07,
|
| 1983 |
+
"loss": 0.192909574508667,
|
| 1984 |
+
"step": 2820
|
| 1985 |
+
},
|
| 1986 |
+
{
|
| 1987 |
+
"epoch": 0.9056,
|
| 1988 |
+
"grad_norm": 0.606401356191172,
|
| 1989 |
+
"learning_rate": 1.3545689574841341e-07,
|
| 1990 |
+
"loss": 0.1932598114013672,
|
| 1991 |
+
"step": 2830
|
| 1992 |
+
},
|
| 1993 |
+
{
|
| 1994 |
+
"epoch": 0.9088,
|
| 1995 |
+
"grad_norm": 0.6138805257535019,
|
| 1996 |
+
"learning_rate": 1.26535716923927e-07,
|
| 1997 |
+
"loss": 0.19897468090057374,
|
| 1998 |
+
"step": 2840
|
| 1999 |
+
},
|
| 2000 |
+
{
|
| 2001 |
+
"epoch": 0.912,
|
| 2002 |
+
"grad_norm": 0.6113791423952993,
|
| 2003 |
+
"learning_rate": 1.1791078055307493e-07,
|
| 2004 |
+
"loss": 0.19516528844833375,
|
| 2005 |
+
"step": 2850
|
| 2006 |
+
},
|
| 2007 |
+
{
|
| 2008 |
+
"epoch": 0.9152,
|
| 2009 |
+
"grad_norm": 0.5897316619244026,
|
| 2010 |
+
"learning_rate": 1.0958316315187289e-07,
|
| 2011 |
+
"loss": 0.1947079300880432,
|
| 2012 |
+
"step": 2860
|
| 2013 |
+
},
|
| 2014 |
+
{
|
| 2015 |
+
"epoch": 0.9184,
|
| 2016 |
+
"grad_norm": 0.6570448249633108,
|
| 2017 |
+
"learning_rate": 1.0155390412665528e-07,
|
| 2018 |
+
"loss": 0.19286593198776245,
|
| 2019 |
+
"step": 2870
|
| 2020 |
+
},
|
| 2021 |
+
{
|
| 2022 |
+
"epoch": 0.9216,
|
| 2023 |
+
"grad_norm": 0.6143543897264965,
|
| 2024 |
+
"learning_rate": 9.38240056443443e-08,
|
| 2025 |
+
"loss": 0.18985612392425538,
|
| 2026 |
+
"step": 2880
|
| 2027 |
+
},
|
| 2028 |
+
{
|
| 2029 |
+
"epoch": 0.9248,
|
| 2030 |
+
"grad_norm": 0.6208574768565508,
|
| 2031 |
+
"learning_rate": 8.639443250736402e-08,
|
| 2032 |
+
"loss": 0.1930636167526245,
|
| 2033 |
+
"step": 2890
|
| 2034 |
+
},
|
| 2035 |
+
{
|
| 2036 |
+
"epoch": 0.928,
|
| 2037 |
+
"grad_norm": 0.6380337536968056,
|
| 2038 |
+
"learning_rate": 7.926611203321777e-08,
|
| 2039 |
+
"loss": 0.1940324306488037,
|
| 2040 |
+
"step": 2900
|
| 2041 |
+
},
|
| 2042 |
+
{
|
| 2043 |
+
"epoch": 0.9312,
|
| 2044 |
+
"grad_norm": 0.6333119199427104,
|
| 2045 |
+
"learning_rate": 7.243993393874882e-08,
|
| 2046 |
+
"loss": 0.195207679271698,
|
| 2047 |
+
"step": 2910
|
| 2048 |
+
},
|
| 2049 |
+
{
|
| 2050 |
+
"epoch": 0.9344,
|
| 2051 |
+
"grad_norm": 0.5601684784399228,
|
| 2052 |
+
"learning_rate": 6.591675022908805e-08,
|
| 2053 |
+
"loss": 0.1926344394683838,
|
| 2054 |
+
"step": 2920
|
| 2055 |
+
},
|
| 2056 |
+
{
|
| 2057 |
+
"epoch": 0.9376,
|
| 2058 |
+
"grad_norm": 0.7001254632467586,
|
| 2059 |
+
"learning_rate": 5.969737509131241e-08,
|
| 2060 |
+
"loss": 0.189910888671875,
|
| 2061 |
+
"step": 2930
|
| 2062 |
+
},
|
| 2063 |
+
{
|
| 2064 |
+
"epoch": 0.9408,
|
| 2065 |
+
"grad_norm": 0.5707165379372983,
|
| 2066 |
+
"learning_rate": 5.3782584792823334e-08,
|
| 2067 |
+
"loss": 0.1941395878791809,
|
| 2068 |
+
"step": 2940
|
| 2069 |
+
},
|
| 2070 |
+
{
|
| 2071 |
+
"epoch": 0.944,
|
| 2072 |
+
"grad_norm": 0.637882100753534,
|
| 2073 |
+
"learning_rate": 4.817311758445686e-08,
|
| 2074 |
+
"loss": 0.19586544036865233,
|
| 2075 |
+
"step": 2950
|
| 2076 |
+
},
|
| 2077 |
+
{
|
| 2078 |
+
"epoch": 0.9472,
|
| 2079 |
+
"grad_norm": 0.58305847153215,
|
| 2080 |
+
"learning_rate": 4.286967360833866e-08,
|
| 2081 |
+
"loss": 0.19621498584747316,
|
| 2082 |
+
"step": 2960
|
| 2083 |
+
},
|
| 2084 |
+
{
|
| 2085 |
+
"epoch": 0.9504,
|
| 2086 |
+
"grad_norm": 0.6444124781946634,
|
| 2087 |
+
"learning_rate": 3.787291481049754e-08,
|
| 2088 |
+
"loss": 0.19597216844558715,
|
| 2089 |
+
"step": 2970
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 0.9536,
|
| 2093 |
+
"grad_norm": 0.68778482150424,
|
| 2094 |
+
"learning_rate": 3.3183464858244364e-08,
|
| 2095 |
+
"loss": 0.20229551792144776,
|
| 2096 |
+
"step": 2980
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 0.9568,
|
| 2100 |
+
"grad_norm": 0.589065287919965,
|
| 2101 |
+
"learning_rate": 2.8801909062328992e-08,
|
| 2102 |
+
"loss": 0.1879359722137451,
|
| 2103 |
+
"step": 2990
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 0.96,
|
| 2107 |
+
"grad_norm": 0.7200708770444023,
|
| 2108 |
+
"learning_rate": 2.4728794303886248e-08,
|
| 2109 |
+
"loss": 0.18806444406509398,
|
| 2110 |
+
"step": 3000
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 0.9632,
|
| 2114 |
+
"grad_norm": 0.6369212243333968,
|
| 2115 |
+
"learning_rate": 2.0964628966175794e-08,
|
| 2116 |
+
"loss": 0.19293060302734374,
|
| 2117 |
+
"step": 3010
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 0.9664,
|
| 2121 |
+
"grad_norm": 0.6150129328436796,
|
| 2122 |
+
"learning_rate": 1.750988287113009e-08,
|
| 2123 |
+
"loss": 0.19189660549163817,
|
| 2124 |
+
"step": 3020
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 0.9696,
|
| 2128 |
+
"grad_norm": 0.5966036549992078,
|
| 2129 |
+
"learning_rate": 1.4364987220713278e-08,
|
| 2130 |
+
"loss": 0.1992994427680969,
|
| 2131 |
+
"step": 3030
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 0.9728,
|
| 2135 |
+
"grad_norm": 0.6785615385564472,
|
| 2136 |
+
"learning_rate": 1.1530334543099763e-08,
|
| 2137 |
+
"loss": 0.19624128341674804,
|
| 2138 |
+
"step": 3040
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 0.976,
|
| 2142 |
+
"grad_norm": 0.626236262460755,
|
| 2143 |
+
"learning_rate": 9.006278643683697e-09,
|
| 2144 |
+
"loss": 0.19942662715911866,
|
| 2145 |
+
"step": 3050
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 0.9792,
|
| 2149 |
+
"grad_norm": 0.71228117768398,
|
| 2150 |
+
"learning_rate": 6.793134560916514e-09,
|
| 2151 |
+
"loss": 0.2007957935333252,
|
| 2152 |
+
"step": 3060
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.9824,
|
| 2156 |
+
"grad_norm": 0.5740813965788273,
|
| 2157 |
+
"learning_rate": 4.891178526986451e-09,
|
| 2158 |
+
"loss": 0.19730459451675414,
|
| 2159 |
+
"step": 3070
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 0.9856,
|
| 2163 |
+
"grad_norm": 0.6522249776214731,
|
| 2164 |
+
"learning_rate": 3.3006479333413943e-09,
|
| 2165 |
+
"loss": 0.1995969295501709,
|
| 2166 |
+
"step": 3080
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.9888,
|
| 2170 |
+
"grad_norm": 0.6484316026206892,
|
| 2171 |
+
"learning_rate": 2.021741301058422e-09,
|
| 2172 |
+
"loss": 0.19556543827056885,
|
| 2173 |
+
"step": 3090
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.992,
|
| 2177 |
+
"grad_norm": 0.6355663767406068,
|
| 2178 |
+
"learning_rate": 1.0546182560652872e-09,
|
| 2179 |
+
"loss": 0.19732578992843627,
|
| 2180 |
+
"step": 3100
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.9952,
|
| 2184 |
+
"grad_norm": 0.6169267731488666,
|
| 2185 |
+
"learning_rate": 3.9939950921774607e-10,
|
| 2186 |
+
"loss": 0.1917206883430481,
|
| 2187 |
+
"step": 3110
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.9984,
|
| 2191 |
+
"grad_norm": 0.5994063111681457,
|
| 2192 |
+
"learning_rate": 5.616684123160854e-11,
|
| 2193 |
+
"loss": 0.1916499137878418,
|
| 2194 |
+
"step": 3120
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 1.0,
|
| 2198 |
+
"step": 3125,
|
| 2199 |
+
"total_flos": 1201860236279808.0,
|
| 2200 |
+
"train_loss": 0.2128027264213562,
|
| 2201 |
+
"train_runtime": 15463.9635,
|
| 2202 |
+
"train_samples_per_second": 12.933,
|
| 2203 |
+
"train_steps_per_second": 0.202
|
| 2204 |
+
}
|
| 2205 |
+
],
|
| 2206 |
+
"logging_steps": 10,
|
| 2207 |
+
"max_steps": 3125,
|
| 2208 |
+
"num_input_tokens_seen": 0,
|
| 2209 |
+
"num_train_epochs": 1,
|
| 2210 |
+
"save_steps": 500,
|
| 2211 |
+
"stateful_callbacks": {
|
| 2212 |
+
"TrainerControl": {
|
| 2213 |
+
"args": {
|
| 2214 |
+
"should_epoch_stop": false,
|
| 2215 |
+
"should_evaluate": false,
|
| 2216 |
+
"should_log": false,
|
| 2217 |
+
"should_save": true,
|
| 2218 |
+
"should_training_stop": true
|
| 2219 |
+
},
|
| 2220 |
+
"attributes": {}
|
| 2221 |
+
}
|
| 2222 |
+
},
|
| 2223 |
+
"total_flos": 1201860236279808.0,
|
| 2224 |
+
"train_batch_size": 8,
|
| 2225 |
+
"trial_name": null,
|
| 2226 |
+
"trial_params": null
|
| 2227 |
+
}
|
checkpoints/Qwen3-VL-2B-SFT/training_loss.png
ADDED
|