rbelanec commited on
Commit
b4e4820
·
verified ·
1 Parent(s): 03cab96

Training in progress, step 600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:683d1769b7f29d12d2b397a6a77046f22b978bfe987c0dabeac1bd7df2fcf543
3
  size 18124968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a781b1f8e84e189944f48f79ebf50782f956b9741f89356f14e0cecbba70d5
3
  size 18124968
trainer_log.jsonl CHANGED
@@ -80,3 +80,44 @@
80
  {"current_steps": 395, "total_steps": 40000, "loss": 0.6311, "lr": 4.998803128396047e-05, "epoch": 0.05057294667434863, "percentage": 0.99, "elapsed_time": "0:09:24", "remaining_time": "15:42:39", "throughput": 4086.43, "total_tokens": 2305152}
81
  {"current_steps": 400, "total_steps": 40000, "loss": 0.7267, "lr": 4.9987725606956215e-05, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:09:28", "remaining_time": "15:37:45", "throughput": 4108.68, "total_tokens": 2335136}
82
  {"current_steps": 400, "total_steps": 40000, "eval_loss": 0.6809284687042236, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:13:12", "remaining_time": "21:47:57", "throughput": 2945.81, "total_tokens": 2335136}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  {"current_steps": 395, "total_steps": 40000, "loss": 0.6311, "lr": 4.998803128396047e-05, "epoch": 0.05057294667434863, "percentage": 0.99, "elapsed_time": "0:09:24", "remaining_time": "15:42:39", "throughput": 4086.43, "total_tokens": 2305152}
81
  {"current_steps": 400, "total_steps": 40000, "loss": 0.7267, "lr": 4.9987725606956215e-05, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:09:28", "remaining_time": "15:37:45", "throughput": 4108.68, "total_tokens": 2335136}
82
  {"current_steps": 400, "total_steps": 40000, "eval_loss": 0.6809284687042236, "epoch": 0.05121311055630241, "percentage": 1.0, "elapsed_time": "0:13:12", "remaining_time": "21:47:57", "throughput": 2945.81, "total_tokens": 2335136}
83
+ {"current_steps": 405, "total_steps": 40000, "loss": 0.7494, "lr": 4.998741607653066e-05, "epoch": 0.05185327443825619, "percentage": 1.01, "elapsed_time": "0:13:18", "remaining_time": "21:41:15", "throughput": 2960.74, "total_tokens": 2364448}
84
+ {"current_steps": 410, "total_steps": 40000, "loss": 0.7064, "lr": 4.9987102692731523e-05, "epoch": 0.05249343832020997, "percentage": 1.03, "elapsed_time": "0:13:22", "remaining_time": "21:32:01", "throughput": 2981.81, "total_tokens": 2393856}
85
+ {"current_steps": 415, "total_steps": 40000, "loss": 0.6321, "lr": 4.9986785455607157e-05, "epoch": 0.05313360220216375, "percentage": 1.04, "elapsed_time": "0:13:27", "remaining_time": "21:22:59", "throughput": 3002.25, "total_tokens": 2422912}
86
+ {"current_steps": 420, "total_steps": 40000, "loss": 0.66, "lr": 4.9986464365206456e-05, "epoch": 0.05377376608411753, "percentage": 1.05, "elapsed_time": "0:13:31", "remaining_time": "21:14:13", "throughput": 3022.96, "total_tokens": 2452480}
87
+ {"current_steps": 425, "total_steps": 40000, "loss": 0.7425, "lr": 4.9986139421578956e-05, "epoch": 0.05441392996607131, "percentage": 1.06, "elapsed_time": "0:13:35", "remaining_time": "21:05:46", "throughput": 3044.94, "total_tokens": 2483456}
88
+ {"current_steps": 430, "total_steps": 40000, "loss": 0.7295, "lr": 4.998581062477477e-05, "epoch": 0.05505409384802509, "percentage": 1.07, "elapsed_time": "0:13:39", "remaining_time": "20:57:28", "throughput": 3066.85, "total_tokens": 2514464}
89
+ {"current_steps": 435, "total_steps": 40000, "loss": 0.6757, "lr": 4.998547797484458e-05, "epoch": 0.05569425772997887, "percentage": 1.09, "elapsed_time": "0:13:44", "remaining_time": "20:49:17", "throughput": 3087.2, "total_tokens": 2544224}
90
+ {"current_steps": 440, "total_steps": 40000, "loss": 0.7157, "lr": 4.9985141471839706e-05, "epoch": 0.05633442161193265, "percentage": 1.1, "elapsed_time": "0:13:48", "remaining_time": "20:41:16", "throughput": 3106.88, "total_tokens": 2573600}
91
+ {"current_steps": 445, "total_steps": 40000, "loss": 0.6481, "lr": 4.998480111581203e-05, "epoch": 0.05697458549388643, "percentage": 1.11, "elapsed_time": "0:13:52", "remaining_time": "20:33:26", "throughput": 3126.45, "total_tokens": 2603040}
92
+ {"current_steps": 450, "total_steps": 40000, "loss": 0.6413, "lr": 4.998445690681405e-05, "epoch": 0.05761474937584021, "percentage": 1.12, "elapsed_time": "0:13:56", "remaining_time": "20:25:43", "throughput": 3144.74, "total_tokens": 2631456}
93
+ {"current_steps": 455, "total_steps": 40000, "loss": 0.6498, "lr": 4.9984108844898834e-05, "epoch": 0.05825491325779399, "percentage": 1.14, "elapsed_time": "0:14:01", "remaining_time": "20:18:13", "throughput": 3163.49, "total_tokens": 2660512}
94
+ {"current_steps": 460, "total_steps": 40000, "loss": 0.6828, "lr": 4.9983756930120076e-05, "epoch": 0.05889507713974777, "percentage": 1.15, "elapsed_time": "0:14:05", "remaining_time": "20:10:51", "throughput": 3181.95, "total_tokens": 2689440}
95
+ {"current_steps": 465, "total_steps": 40000, "loss": 0.6701, "lr": 4.9983401162532025e-05, "epoch": 0.05953524102170155, "percentage": 1.16, "elapsed_time": "0:14:09", "remaining_time": "20:03:40", "throughput": 3200.49, "total_tokens": 2718624}
96
+ {"current_steps": 470, "total_steps": 40000, "loss": 0.6111, "lr": 4.998304154218955e-05, "epoch": 0.06017540490365533, "percentage": 1.18, "elapsed_time": "0:14:13", "remaining_time": "19:56:38", "throughput": 3218.93, "total_tokens": 2747872}
97
+ {"current_steps": 475, "total_steps": 40000, "loss": 0.6352, "lr": 4.998267806914812e-05, "epoch": 0.06081556878560911, "percentage": 1.19, "elapsed_time": "0:14:17", "remaining_time": "19:49:45", "throughput": 3236.37, "total_tokens": 2776448}
98
+ {"current_steps": 480, "total_steps": 40000, "loss": 0.5648, "lr": 4.998231074346378e-05, "epoch": 0.06145573266756289, "percentage": 1.2, "elapsed_time": "0:14:22", "remaining_time": "19:42:59", "throughput": 3254.29, "total_tokens": 2805504}
99
+ {"current_steps": 485, "total_steps": 40000, "loss": 0.7029, "lr": 4.998193956519317e-05, "epoch": 0.06209589654951667, "percentage": 1.21, "elapsed_time": "0:14:26", "remaining_time": "19:36:22", "throughput": 3272.48, "total_tokens": 2835008}
100
+ {"current_steps": 490, "total_steps": 40000, "loss": 0.6431, "lr": 4.9981564534393545e-05, "epoch": 0.06273606043147045, "percentage": 1.23, "elapsed_time": "0:14:30", "remaining_time": "19:29:55", "throughput": 3290.43, "total_tokens": 2864512}
101
+ {"current_steps": 495, "total_steps": 40000, "loss": 0.6465, "lr": 4.998118565112272e-05, "epoch": 0.06337622431342424, "percentage": 1.24, "elapsed_time": "0:14:34", "remaining_time": "19:23:51", "throughput": 3309.48, "total_tokens": 2895776}
102
+ {"current_steps": 500, "total_steps": 40000, "loss": 0.6654, "lr": 4.998080291543914e-05, "epoch": 0.06401638819537801, "percentage": 1.25, "elapsed_time": "0:14:39", "remaining_time": "19:17:39", "throughput": 3327.15, "total_tokens": 2925344}
103
+ {"current_steps": 505, "total_steps": 40000, "loss": 0.6738, "lr": 4.9980416327401826e-05, "epoch": 0.0646565520773318, "percentage": 1.26, "elapsed_time": "0:14:43", "remaining_time": "19:11:31", "throughput": 3343.2, "total_tokens": 2953504}
104
+ {"current_steps": 510, "total_steps": 40000, "loss": 0.7437, "lr": 4.998002588707038e-05, "epoch": 0.06529671595928557, "percentage": 1.27, "elapsed_time": "0:14:47", "remaining_time": "19:05:33", "throughput": 3359.81, "total_tokens": 2982400}
105
+ {"current_steps": 515, "total_steps": 40000, "loss": 0.7317, "lr": 4.997963159450503e-05, "epoch": 0.06593687984123936, "percentage": 1.29, "elapsed_time": "0:14:51", "remaining_time": "18:59:43", "throughput": 3377.78, "total_tokens": 3012704}
106
+ {"current_steps": 520, "total_steps": 40000, "loss": 0.6243, "lr": 4.9979233449766575e-05, "epoch": 0.06657704372319313, "percentage": 1.3, "elapsed_time": "0:14:56", "remaining_time": "18:53:57", "throughput": 3394.2, "total_tokens": 3041664}
107
+ {"current_steps": 525, "total_steps": 40000, "loss": 0.6336, "lr": 4.997883145291641e-05, "epoch": 0.06721720760514692, "percentage": 1.31, "elapsed_time": "0:15:00", "remaining_time": "18:48:18", "throughput": 3410.59, "total_tokens": 3070752}
108
+ {"current_steps": 530, "total_steps": 40000, "loss": 0.6422, "lr": 4.9978425604016536e-05, "epoch": 0.06785737148710069, "percentage": 1.32, "elapsed_time": "0:15:04", "remaining_time": "18:42:46", "throughput": 3427.67, "total_tokens": 3100640}
109
+ {"current_steps": 535, "total_steps": 40000, "loss": 0.6761, "lr": 4.9978015903129536e-05, "epoch": 0.06849753536905448, "percentage": 1.34, "elapsed_time": "0:15:08", "remaining_time": "18:37:20", "throughput": 3444.35, "total_tokens": 3130304}
110
+ {"current_steps": 540, "total_steps": 40000, "loss": 0.5755, "lr": 4.997760235031859e-05, "epoch": 0.06913769925100825, "percentage": 1.35, "elapsed_time": "0:15:13", "remaining_time": "18:31:57", "throughput": 3459.42, "total_tokens": 3158496}
111
+ {"current_steps": 545, "total_steps": 40000, "loss": 0.7194, "lr": 4.9977184945647473e-05, "epoch": 0.06977786313296204, "percentage": 1.36, "elapsed_time": "0:15:17", "remaining_time": "18:26:42", "throughput": 3474.93, "total_tokens": 3187328}
112
+ {"current_steps": 550, "total_steps": 40000, "loss": 0.7499, "lr": 4.997676368918055e-05, "epoch": 0.07041802701491581, "percentage": 1.38, "elapsed_time": "0:15:21", "remaining_time": "18:21:32", "throughput": 3490.0, "total_tokens": 3215840}
113
+ {"current_steps": 555, "total_steps": 40000, "loss": 0.6253, "lr": 4.9976338580982794e-05, "epoch": 0.0710581908968696, "percentage": 1.39, "elapsed_time": "0:15:25", "remaining_time": "18:16:28", "throughput": 3505.96, "total_tokens": 3245344}
114
+ {"current_steps": 560, "total_steps": 40000, "loss": 0.6416, "lr": 4.9975909621119755e-05, "epoch": 0.07169835477882337, "percentage": 1.4, "elapsed_time": "0:15:29", "remaining_time": "18:11:31", "throughput": 3521.46, "total_tokens": 3274592}
115
+ {"current_steps": 565, "total_steps": 40000, "loss": 0.6401, "lr": 4.997547680965758e-05, "epoch": 0.07233851866077716, "percentage": 1.41, "elapsed_time": "0:15:34", "remaining_time": "18:06:36", "throughput": 3536.18, "total_tokens": 3303136}
116
+ {"current_steps": 570, "total_steps": 40000, "loss": 0.6313, "lr": 4.997504014666302e-05, "epoch": 0.07297868254273093, "percentage": 1.43, "elapsed_time": "0:15:38", "remaining_time": "18:01:48", "throughput": 3551.8, "total_tokens": 3332704}
117
+ {"current_steps": 575, "total_steps": 40000, "loss": 0.7688, "lr": 4.997459963220342e-05, "epoch": 0.07361884642468472, "percentage": 1.44, "elapsed_time": "0:15:42", "remaining_time": "17:57:05", "throughput": 3567.87, "total_tokens": 3362880}
118
+ {"current_steps": 580, "total_steps": 40000, "loss": 0.6383, "lr": 4.997415526634671e-05, "epoch": 0.07425901030663849, "percentage": 1.45, "elapsed_time": "0:15:46", "remaining_time": "17:52:33", "throughput": 3583.63, "total_tokens": 3393184}
119
+ {"current_steps": 585, "total_steps": 40000, "loss": 0.5934, "lr": 4.99737070491614e-05, "epoch": 0.07489917418859228, "percentage": 1.46, "elapsed_time": "0:15:51", "remaining_time": "17:47:58", "throughput": 3597.77, "total_tokens": 3421664}
120
+ {"current_steps": 590, "total_steps": 40000, "loss": 0.6136, "lr": 4.997325498071663e-05, "epoch": 0.07553933807054607, "percentage": 1.47, "elapsed_time": "0:15:55", "remaining_time": "17:43:27", "throughput": 3612.32, "total_tokens": 3450688}
121
+ {"current_steps": 595, "total_steps": 40000, "loss": 0.6038, "lr": 4.997279906108211e-05, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:15:59", "remaining_time": "17:39:18", "throughput": 3628.15, "total_tokens": 3481952}
122
+ {"current_steps": 600, "total_steps": 40000, "loss": 0.7324, "lr": 4.9972339290328155e-05, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:03", "remaining_time": "17:35:00", "throughput": 3642.95, "total_tokens": 3511712}
123
+ {"current_steps": 600, "total_steps": 40000, "eval_loss": 0.6449815034866333, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:19:48", "remaining_time": "21:40:53", "throughput": 2954.4, "total_tokens": 3511712}