rbelanec commited on
Commit
59d9ce0
·
verified ·
1 Parent(s): a7a04c4

Training in progress, step 600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c19f874e3e87b13eed1ea4e97ab4ed6579cb65da69f6e46e9dd89916971fa4
3
  size 460928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358976bd9b25f69eebd758545e5efdcc448cf6549e260e12c62c9e3b03f5630c
3
  size 460928
trainer_log.jsonl CHANGED
@@ -80,3 +80,44 @@
80
  {"current_steps": 395, "total_steps": 40000, "loss": 1.0139, "lr": 0.29992818770376284, "epoch": 0.05057294667434863, "percentage": 0.99, "elapsed_time": "0:07:20", "remaining_time": "12:15:35", "throughput": 5236.82, "total_tokens": 2305152}
81
  {"current_steps": 400, "total_steps": 40000, "loss": 1.0969, "lr": 0.29992635364173725, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:07:23", "remaining_time": "12:11:14", "throughput": 5269.11, "total_tokens": 2335136}
82
  {"current_steps": 400, "total_steps": 40000, "eval_loss": 1.0986460447311401, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:10:44", "remaining_time": "17:42:45", "throughput": 3625.42, "total_tokens": 2335136}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  {"current_steps": 395, "total_steps": 40000, "loss": 1.0139, "lr": 0.29992818770376284, "epoch": 0.05057294667434863, "percentage": 0.99, "elapsed_time": "0:07:20", "remaining_time": "12:15:35", "throughput": 5236.82, "total_tokens": 2305152}
81
  {"current_steps": 400, "total_steps": 40000, "loss": 1.0969, "lr": 0.29992635364173725, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:07:23", "remaining_time": "12:11:14", "throughput": 5269.11, "total_tokens": 2335136}
82
  {"current_steps": 400, "total_steps": 40000, "eval_loss": 1.0986460447311401, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:10:44", "remaining_time": "17:42:45", "throughput": 3625.42, "total_tokens": 2335136}
83
+ {"current_steps": 405, "total_steps": 40000, "loss": 1.2408, "lr": 0.2999244964591839, "epoch": 0.05185327443825619, "percentage": 1.01, "elapsed_time": "0:10:48", "remaining_time": "17:37:22", "throughput": 3643.65, "total_tokens": 2364448}
84
+ {"current_steps": 410, "total_steps": 40000, "loss": 1.1477, "lr": 0.2999226161563891, "epoch": 0.05249343832020997, "percentage": 1.03, "elapsed_time": "0:10:51", "remaining_time": "17:29:08", "throughput": 3672.09, "total_tokens": 2393856}
85
+ {"current_steps": 415, "total_steps": 40000, "loss": 1.0423, "lr": 0.2999207127336429, "epoch": 0.05313360220216375, "percentage": 1.04, "elapsed_time": "0:10:54", "remaining_time": "17:21:05", "throughput": 3699.84, "total_tokens": 2422912}
86
+ {"current_steps": 420, "total_steps": 40000, "loss": 1.0272, "lr": 0.2999187861912387, "epoch": 0.05377376608411753, "percentage": 1.05, "elapsed_time": "0:10:57", "remaining_time": "17:13:14", "throughput": 3728.01, "total_tokens": 2452480}
87
+ {"current_steps": 425, "total_steps": 40000, "loss": 1.1012, "lr": 0.2999168365294737, "epoch": 0.05441392996607131, "percentage": 1.06, "elapsed_time": "0:11:00", "remaining_time": "17:05:46", "throughput": 3757.38, "total_tokens": 2483456}
88
+ {"current_steps": 430, "total_steps": 40000, "loss": 1.2242, "lr": 0.29991486374864856, "epoch": 0.05505409384802509, "percentage": 1.07, "elapsed_time": "0:11:04", "remaining_time": "16:58:25", "throughput": 3786.74, "total_tokens": 2514464}
89
+ {"current_steps": 435, "total_steps": 40000, "loss": 1.0773, "lr": 0.29991286784906745, "epoch": 0.05569425772997887, "percentage": 1.09, "elapsed_time": "0:11:07", "remaining_time": "16:51:37", "throughput": 3812.5, "total_tokens": 2544224}
90
+ {"current_steps": 440, "total_steps": 40000, "loss": 1.1898, "lr": 0.2999108488310382, "epoch": 0.05633442161193265, "percentage": 1.1, "elapsed_time": "0:11:10", "remaining_time": "16:44:29", "throughput": 3839.28, "total_tokens": 2573600}
91
+ {"current_steps": 445, "total_steps": 40000, "loss": 1.1474, "lr": 0.29990880669487213, "epoch": 0.05697458549388643, "percentage": 1.11, "elapsed_time": "0:11:13", "remaining_time": "16:37:29", "throughput": 3866.02, "total_tokens": 2603040}
92
+ {"current_steps": 450, "total_steps": 40000, "loss": 1.0324, "lr": 0.29990674144088425, "epoch": 0.05761474937584021, "percentage": 1.12, "elapsed_time": "0:11:16", "remaining_time": "16:30:33", "throughput": 3891.31, "total_tokens": 2631456}
93
+ {"current_steps": 455, "total_steps": 40000, "loss": 1.0619, "lr": 0.299904653069393, "epoch": 0.05825491325779399, "percentage": 1.14, "elapsed_time": "0:11:19", "remaining_time": "16:23:48", "throughput": 3917.24, "total_tokens": 2660512}
94
+ {"current_steps": 460, "total_steps": 40000, "loss": 0.9969, "lr": 0.29990254158072044, "epoch": 0.05889507713974777, "percentage": 1.15, "elapsed_time": "0:11:22", "remaining_time": "16:17:14", "throughput": 3942.63, "total_tokens": 2689440}
95
+ {"current_steps": 465, "total_steps": 40000, "loss": 1.0444, "lr": 0.2999004069751921, "epoch": 0.05953524102170155, "percentage": 1.16, "elapsed_time": "0:11:25", "remaining_time": "16:10:48", "throughput": 3968.19, "total_tokens": 2718624}
96
+ {"current_steps": 470, "total_steps": 40000, "loss": 1.0846, "lr": 0.2998982492531373, "epoch": 0.06017540490365533, "percentage": 1.18, "elapsed_time": "0:11:28", "remaining_time": "16:04:31", "throughput": 3993.58, "total_tokens": 2747872}
97
+ {"current_steps": 475, "total_steps": 40000, "loss": 1.1216, "lr": 0.2998960684148887, "epoch": 0.06081556878560911, "percentage": 1.19, "elapsed_time": "0:11:31", "remaining_time": "15:58:19", "throughput": 4017.94, "total_tokens": 2776448}
98
+ {"current_steps": 480, "total_steps": 40000, "loss": 1.0923, "lr": 0.29989386446078264, "epoch": 0.06145573266756289, "percentage": 1.2, "elapsed_time": "0:11:33", "remaining_time": "15:52:17", "throughput": 4042.63, "total_tokens": 2805504}
99
+ {"current_steps": 485, "total_steps": 40000, "loss": 1.0878, "lr": 0.299891637391159, "epoch": 0.06209589654951667, "percentage": 1.21, "elapsed_time": "0:11:36", "remaining_time": "15:46:25", "throughput": 4067.6, "total_tokens": 2835008}
100
+ {"current_steps": 490, "total_steps": 40000, "loss": 0.9946, "lr": 0.2998893872063612, "epoch": 0.06273606043147045, "percentage": 1.23, "elapsed_time": "0:11:39", "remaining_time": "15:40:39", "throughput": 4092.4, "total_tokens": 2864512}
101
+ {"current_steps": 495, "total_steps": 40000, "loss": 1.0689, "lr": 0.2998871139067363, "epoch": 0.06337622431342424, "percentage": 1.24, "elapsed_time": "0:11:43", "remaining_time": "15:35:12", "throughput": 4118.65, "total_tokens": 2895776}
102
+ {"current_steps": 500, "total_steps": 40000, "loss": 1.1296, "lr": 0.2998848174926348, "epoch": 0.06401638819537801, "percentage": 1.25, "elapsed_time": "0:11:46", "remaining_time": "15:29:40", "throughput": 4143.03, "total_tokens": 2925344}
103
+ {"current_steps": 505, "total_steps": 40000, "loss": 0.8825, "lr": 0.2998824979644109, "epoch": 0.0646565520773318, "percentage": 1.26, "elapsed_time": "0:11:48", "remaining_time": "15:24:09", "throughput": 4165.76, "total_tokens": 2953504}
104
+ {"current_steps": 510, "total_steps": 40000, "loss": 1.1476, "lr": 0.29988015532242224, "epoch": 0.06529671595928557, "percentage": 1.27, "elapsed_time": "0:11:51", "remaining_time": "15:18:46", "throughput": 4189.1, "total_tokens": 2982400}
105
+ {"current_steps": 515, "total_steps": 40000, "loss": 1.1222, "lr": 0.29987778956703015, "epoch": 0.06593687984123936, "percentage": 1.29, "elapsed_time": "0:11:54", "remaining_time": "15:13:37", "throughput": 4213.68, "total_tokens": 3012704}
106
+ {"current_steps": 520, "total_steps": 40000, "loss": 1.0441, "lr": 0.2998754006985994, "epoch": 0.06657704372319313, "percentage": 1.3, "elapsed_time": "0:11:57", "remaining_time": "15:08:27", "throughput": 4236.69, "total_tokens": 3041664}
107
+ {"current_steps": 525, "total_steps": 40000, "loss": 0.9971, "lr": 0.29987298871749846, "epoch": 0.06721720760514692, "percentage": 1.31, "elapsed_time": "0:12:00", "remaining_time": "15:03:25", "throughput": 4259.56, "total_tokens": 3070752}
108
+ {"current_steps": 530, "total_steps": 40000, "loss": 1.068, "lr": 0.2998705536240992, "epoch": 0.06785737148710069, "percentage": 1.32, "elapsed_time": "0:12:03", "remaining_time": "14:58:31", "throughput": 4283.13, "total_tokens": 3100640}
109
+ {"current_steps": 535, "total_steps": 40000, "loss": 1.1588, "lr": 0.2998680954187772, "epoch": 0.06849753536905448, "percentage": 1.34, "elapsed_time": "0:12:06", "remaining_time": "14:53:41", "throughput": 4306.31, "total_tokens": 3130304}
110
+ {"current_steps": 540, "total_steps": 40000, "loss": 0.9421, "lr": 0.2998656141019115, "epoch": 0.06913769925100825, "percentage": 1.35, "elapsed_time": "0:12:09", "remaining_time": "14:48:51", "throughput": 4327.74, "total_tokens": 3158496}
111
+ {"current_steps": 545, "total_steps": 40000, "loss": 1.0863, "lr": 0.2998631096738848, "epoch": 0.06977786313296204, "percentage": 1.36, "elapsed_time": "0:12:12", "remaining_time": "14:44:09", "throughput": 4349.66, "total_tokens": 3187328}
112
+ {"current_steps": 550, "total_steps": 40000, "loss": 1.2508, "lr": 0.29986058213508326, "epoch": 0.07041802701491581, "percentage": 1.38, "elapsed_time": "0:12:15", "remaining_time": "14:39:31", "throughput": 4371.02, "total_tokens": 3215840}
113
+ {"current_steps": 555, "total_steps": 40000, "loss": 0.9364, "lr": 0.29985803148589674, "epoch": 0.0710581908968696, "percentage": 1.39, "elapsed_time": "0:12:18", "remaining_time": "14:35:00", "throughput": 4393.33, "total_tokens": 3245344}
114
+ {"current_steps": 560, "total_steps": 40000, "loss": 1.0526, "lr": 0.2998554577267185, "epoch": 0.07169835477882337, "percentage": 1.4, "elapsed_time": "0:12:21", "remaining_time": "14:30:33", "throughput": 4415.24, "total_tokens": 3274592}
115
+ {"current_steps": 565, "total_steps": 40000, "loss": 0.9364, "lr": 0.2998528608579455, "epoch": 0.07233851866077716, "percentage": 1.41, "elapsed_time": "0:12:24", "remaining_time": "14:26:09", "throughput": 4436.22, "total_tokens": 3303136}
116
+ {"current_steps": 570, "total_steps": 40000, "loss": 1.0474, "lr": 0.2998502408799781, "epoch": 0.07297868254273093, "percentage": 1.43, "elapsed_time": "0:12:27", "remaining_time": "14:21:53", "throughput": 4458.06, "total_tokens": 3332704}
117
+ {"current_steps": 575, "total_steps": 40000, "loss": 1.0375, "lr": 0.2998475977932205, "epoch": 0.07361884642468472, "percentage": 1.44, "elapsed_time": "0:12:30", "remaining_time": "14:17:43", "throughput": 4480.39, "total_tokens": 3362880}
118
+ {"current_steps": 580, "total_steps": 40000, "loss": 1.0328, "lr": 0.29984493159808023, "epoch": 0.07425901030663849, "percentage": 1.45, "elapsed_time": "0:12:33", "remaining_time": "14:13:38", "throughput": 4502.63, "total_tokens": 3393184}
119
+ {"current_steps": 585, "total_steps": 40000, "loss": 0.9672, "lr": 0.29984224229496836, "epoch": 0.07489917418859228, "percentage": 1.46, "elapsed_time": "0:12:36", "remaining_time": "14:09:31", "throughput": 4522.85, "total_tokens": 3421664}
120
+ {"current_steps": 590, "total_steps": 40000, "loss": 0.9533, "lr": 0.2998395298842998, "epoch": 0.07553933807054607, "percentage": 1.47, "elapsed_time": "0:12:39", "remaining_time": "14:05:31", "throughput": 4543.44, "total_tokens": 3450688}
121
+ {"current_steps": 595, "total_steps": 40000, "loss": 1.0442, "lr": 0.29983679436649263, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:12:42", "remaining_time": "14:01:45", "throughput": 4565.78, "total_tokens": 3481952}
122
+ {"current_steps": 600, "total_steps": 40000, "loss": 1.1743, "lr": 0.2998340357419689, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:12:45", "remaining_time": "13:57:57", "throughput": 4586.59, "total_tokens": 3511712}
123
+ {"current_steps": 600, "total_steps": 40000, "eval_loss": 1.051778793334961, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:06", "remaining_time": "17:37:35", "throughput": 3634.09, "total_tokens": 3511712}